diff -Nru python-biopython-1.62/Bio/Affy/CelFile.py python-biopython-1.63/Bio/Affy/CelFile.py
--- python-biopython-1.62/Bio/Affy/CelFile.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Affy/CelFile.py 2013-12-05 14:10:43.000000000 +0000
@@ -58,7 +58,8 @@
if "=" in line:
continue
words = line.split()
- y, x = map(int, words[:2])
+ y = int(words[0])
+ x = int(words[1])
record.intensities[x, y] = float(words[2])
record.stdevs[x, y] = float(words[3])
record.npix[x, y] = int(words[4])
diff -Nru python-biopython-1.62/Bio/Affy/__init__.py python-biopython-1.63/Bio/Affy/__init__.py
--- python-biopython-1.62/Bio/Affy/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Affy/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,2 +1,7 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Deal with Affymetrix related data such as cel files.
"""
diff -Nru python-biopython-1.62/Bio/Align/AlignInfo.py python-biopython-1.63/Bio/Align/AlignInfo.py
--- python-biopython-1.62/Bio/Align/AlignInfo.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Align/AlignInfo.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Extract information from alignment objects.
In order to try and avoid huge alignment objects with tons of functions,
@@ -9,11 +14,11 @@
o PSSM
"""
-# standard library
+from __future__ import print_function
+
import math
import sys
-# biopython modules
from Bio import Alphabet
from Bio.Alphabet import IUPAC
from Bio.Seq import Seq
@@ -255,8 +260,8 @@
rep_dict = self._pair_replacement(
self.alignment._records[rec_num1].seq,
self.alignment._records[rec_num2].seq,
- self.alignment._records[rec_num1].annotations.get('weight',1.0),
- self.alignment._records[rec_num2].annotations.get('weight',1.0),
+ self.alignment._records[rec_num1].annotations.get('weight', 1.0),
+ self.alignment._records[rec_num2].annotations.get('weight', 1.0),
rep_dict, skip_items)
return rep_dict
@@ -316,8 +321,7 @@
#Note the built in set does not have a union_update
#which was provided by the sets module's Set
set_letters = set_letters.union(record.seq)
- list_letters = list(set_letters)
- list_letters.sort()
+ list_letters = sorted(set_letters)
all_letters = "".join(list_letters)
return all_letters
@@ -342,7 +346,7 @@
# and drop it out
if isinstance(self.alignment._alphabet, Alphabet.Gapped):
skip_items.append(self.alignment._alphabet.gap_char)
- all_letters = all_letters.replace(self.alignment._alphabet.gap_char,'')
+ all_letters = all_letters.replace(self.alignment._alphabet.gap_char, '')
# now create the dictionary
for first_letter in all_letters:
@@ -499,7 +503,7 @@
info_content[residue_num] = column_score
# sum up the score
- total_info = sum(info_content.itervalues())
+ total_info = sum(info_content.values())
# fill in the ic_vector member: holds IC for each column
for i in info_content:
self.ic_vector[i] = info_content[i]
@@ -528,7 +532,7 @@
for record in all_records:
try:
if record.seq[residue_num] not in to_ignore:
- weight = record.annotations.get('weight',1.0)
+ weight = record.annotations.get('weight', 1.0)
freq_info[record.seq[residue_num]] += weight
total_count += weight
# getting a key error means we've got a problem with the alphabet
@@ -575,8 +579,8 @@
if (key != gap_char and key not in e_freq_table):
raise ValueError("Expected frequency letters %s "
"do not match observed %s"
- % (e_freq_table.keys(),
- obs_freq.keys() - [gap_char]))
+ % (list(e_freq_table.keys()),
+ list(obs_freq.keys()) - [gap_char]))
total_info = 0.0
@@ -598,7 +602,7 @@
total_info += letter_info
return total_info
- def get_column(self,col):
+ def get_column(self, col):
return self.alignment.get_column(col)
@@ -647,8 +651,7 @@
def __str__(self):
out = " "
- all_residues = self.pssm[0][1].keys()
- all_residues.sort()
+ all_residues = sorted(self.pssm[0][1])
# first print out the top header
for res in all_residues:
@@ -677,14 +680,12 @@
if not summary_info.ic_vector:
summary_info.information_content()
rep_sequence = summary_info.alignment._records[rep_record].seq
- positions = summary_info.ic_vector.keys()
- positions.sort()
- for pos in positions:
+ for pos in sorted(summary_info.ic_vector):
fout.write("%d %s %.3f\n" % (pos, rep_sequence[pos],
summary_info.ic_vector[pos]))
if __name__ == "__main__":
- print "Quick test"
+ print("Quick test")
from Bio import AlignIO
from Bio.Align.Generic import Alignment
@@ -696,41 +697,41 @@
alignment = AlignIO.read(open(filename), format)
for record in alignment:
- print str(record.seq)
- print "="*alignment.get_alignment_length()
+ print(str(record.seq))
+ print("="*alignment.get_alignment_length())
summary = SummaryInfo(alignment)
consensus = summary.dumb_consensus(ambiguous="N")
- print consensus
+ print(consensus)
consensus = summary.gap_consensus(ambiguous="N")
- print consensus
- print
- print summary.pos_specific_score_matrix(chars_to_ignore=['-'],
- axis_seq=consensus)
- print
+ print(consensus)
+ print("")
+ print(summary.pos_specific_score_matrix(chars_to_ignore=['-'],
+ axis_seq=consensus))
+ print("")
#Have a generic alphabet, without a declared gap char, so must tell
#provide the frequencies and chars to ignore explicitly.
- print summary.information_content(e_freq_table=expected,
- chars_to_ignore=['-'])
- print
- print "Trying a protein sequence with gaps and stops"
+ print(summary.information_content(e_freq_table=expected,
+ chars_to_ignore=['-']))
+ print("")
+ print("Trying a protein sequence with gaps and stops")
alpha = Alphabet.HasStopCodon(Alphabet.Gapped(Alphabet.generic_protein, "-"), "*")
a = Alignment(alpha)
a.add_sequence("ID001", "MHQAIFIYQIGYP*LKSGYIQSIRSPEYDNW-")
a.add_sequence("ID002", "MH--IFIYQIGYAYLKSGYIQSIRSPEY-NW*")
a.add_sequence("ID003", "MHQAIFIYQIGYPYLKSGYIQSIRSPEYDNW*")
- print a
- print "="*a.get_alignment_length()
+ print(a)
+ print("="*a.get_alignment_length())
s = SummaryInfo(a)
c = s.dumb_consensus(ambiguous="X")
- print c
+ print(c)
c = s.gap_consensus(ambiguous="X")
- print c
- print
- print s.pos_specific_score_matrix(chars_to_ignore=['-', '*'], axis_seq=c)
+ print(c)
+ print("")
+ print(s.pos_specific_score_matrix(chars_to_ignore=['-', '*'], axis_seq=c))
- print s.information_content(chars_to_ignore=['-', '*'])
+ print(s.information_content(chars_to_ignore=['-', '*']))
- print "Done"
+ print("Done")
diff -Nru python-biopython-1.62/Bio/Align/Applications/_ClustalOmega.py python-biopython-1.63/Bio/Align/Applications/_ClustalOmega.py
--- python-biopython-1.62/Bio/Align/Applications/_ClustalOmega.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Align/Applications/_ClustalOmega.py 2013-12-05 14:10:43.000000000 +0000
@@ -11,6 +11,8 @@
"""Command line wrapper for the multiple alignment program Clustal Omega.
"""
+from __future__ import print_function
+
__docformat__ = "epytext en" # Don't just use plain text in epydoc API pages!
from Bio.Application import _Option, _Switch, AbstractCommandline
@@ -27,7 +29,7 @@
>>> in_file = "unaligned.fasta"
>>> out_file = "aligned.fasta"
>>> clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=True)
- >>> print clustalomega_cline
+ >>> print(clustalomega_cline)
clustalo -i unaligned.fasta -o aligned.fasta --auto -v
@@ -199,10 +201,10 @@
def _test():
"""Run the module's doctests (PRIVATE)."""
- print "Running ClustalOmega doctests..."
+ print("Running ClustalOmega doctests...")
import doctest
doctest.testmod()
- print "Done"
+ print("Done")
if __name__ == "__main__":
_test()
diff -Nru python-biopython-1.62/Bio/Align/Applications/_Clustalw.py python-biopython-1.63/Bio/Align/Applications/_Clustalw.py
--- python-biopython-1.62/Bio/Align/Applications/_Clustalw.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Align/Applications/_Clustalw.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
"""Command line wrapper for the multiple alignment program Clustal W.
"""
+from __future__ import print_function
+
__docformat__ = "epytext en" # Don't just use plain text in epydoc API pages!
import os
@@ -21,7 +23,7 @@
>>> from Bio.Align.Applications import ClustalwCommandline
>>> in_file = "unaligned.fasta"
>>> clustalw_cline = ClustalwCommandline("clustalw2", infile=in_file)
- >>> print clustalw_cline
+ >>> print(clustalw_cline)
clustalw2 -infile=unaligned.fasta
You would typically run the command line with clustalw_cline() or via
@@ -142,7 +144,7 @@
_Option(["-score", "-SCORE", "SCORE", "score"],
"Either: PERCENT or ABSOLUTE",
checker_function=lambda x: x in ["percent", "PERCENT",
- "absolute","ABSOLUTE"]),
+ "absolute", "ABSOLUTE"]),
# ***Slow Pairwise Alignments:***
_Option(["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"],
"Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename",
@@ -328,10 +330,10 @@
def _test():
"""Run the module's doctests (PRIVATE)."""
- print "Running ClustalW doctests..."
+ print("Running ClustalW doctests...")
import doctest
doctest.testmod()
- print "Done"
+ print("Done")
if __name__ == "__main__":
_test()
diff -Nru python-biopython-1.62/Bio/Align/Applications/_Dialign.py python-biopython-1.63/Bio/Align/Applications/_Dialign.py
--- python-biopython-1.62/Bio/Align/Applications/_Dialign.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Align/Applications/_Dialign.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
"""Command line wrapper for the multiple alignment program DIALIGN2-2.
"""
+from __future__ import print_function
+
__docformat__ = "epytext en" # Don't just use plain text in epydoc API pages!
from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline
@@ -23,7 +25,7 @@
>>> from Bio.Align.Applications import DialignCommandline
>>> dialign_cline = DialignCommandline(input="unaligned.fasta",
... fn="aligned", fa=True)
- >>> print dialign_cline
+ >>> print(dialign_cline)
dialign2-2 -fa -fn aligned unaligned.fasta
You would typically run the command line with dialign_cline() or via
@@ -157,7 +159,7 @@
"Maximum number of `*' characters indicating degree "
"of local similarity among sequences. By default, no "
"stars are used but numbers between 0 and 9, instead.",
- checker_function = lambda x: x in range(0,10),
+ checker_function = lambda x: x in range(0, 10),
equate=False),
_Switch(["-stdo", "stdo"],
"Results written to standard output."),
@@ -182,10 +184,10 @@
def _test():
"""Run the module's doctests (PRIVATE)."""
- print "Running modules doctests..."
+ print("Running modules doctests...")
import doctest
doctest.testmod()
- print "Done"
+ print("Done")
if __name__ == "__main__":
_test()
diff -Nru python-biopython-1.62/Bio/Align/Applications/_MSAProbs.py python-biopython-1.63/Bio/Align/Applications/_MSAProbs.py
--- python-biopython-1.62/Bio/Align/Applications/_MSAProbs.py 1970-01-01 00:00:00.000000000 +0000
+++ python-biopython-1.63/Bio/Align/Applications/_MSAProbs.py 2013-12-05 14:10:43.000000000 +0000
@@ -0,0 +1,86 @@
+# Copyright 2013 by Christian Brueffer. All rights reserved.
+#
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+"""Command line wrapper for the multiple sequence alignment program MSAProbs.
+"""
+
+from __future__ import print_function
+
+__docformat__ = "epytext en" # Don't just use plain text in epydoc API pages!
+
+from Bio.Application import _Argument, _Option, _Switch, AbstractCommandline
+
+
+class MSAProbsCommandline(AbstractCommandline):
+ """Command line wrapper for MSAProbs.
+
+ http://msaprobs.sourceforge.net
+
+ Example:
+
+ >>> from Bio.Align.Applications import MSAProbsCommandline
+ >>> in_file = "unaligned.fasta"
+ >>> out_file = "aligned.cla"
+ >>> cline = MSAProbsCommandline(infile=in_file, outfile=out_file, clustalw=True)
+ >>> print(cline)
+ msaprobs -o aligned.cla -clustalw unaligned.fasta
+
+ You would typically run the command line with cline() or via
+ the Python subprocess module, as described in the Biopython tutorial.
+
+ Citation:
+
+ Yongchao Liu, Bertil Schmidt, Douglas L. Maskell: "MSAProbs: multiple
+ sequence alignment based on pair hidden Markov models and partition
+ function posterior probabilities". Bioinformatics, 2010, 26(16): 1958 -1964
+
+ Last checked against version: 0.9.7
+ """
+
+ def __init__(self, cmd="msaprobs", **kwargs):
+ # order of parameters is the same as in msaprobs -help
+ self.parameters = \
+ [
+ _Option(["-o", "--outfile", "outfile"],
+ "specify the output file name (STDOUT by default)",
+ filename=True,
+ equate=False),
+ _Option(["-num_threads", "numthreads"],
+ "specify the number of threads used, and otherwise detect automatically",
+ checker_function=lambda x: isinstance(x, int)),
+ _Switch(["-clustalw", "clustalw"],
+ "use CLUSTALW output format instead of FASTA format"),
+ _Option(["-c", "consistency"],
+ "use 0 <= REPS <= 5 (default: 2) passes of consistency transformation",
+ checker_function=lambda x: isinstance(x, int) and 0 <= x <= 5),
+ _Option(["-ir", "--iterative-refinement", "iterative_refinement"],
+ "use 0 <= REPS <= 1000 (default: 10) passes of iterative-refinement",
+ checker_function=lambda x: isinstance(x, int) and 0 <= x <= 1000),
+ _Switch(["-v", "verbose"],
+ "report progress while aligning (default: off)"),
+ _Option(["-annot", "annot"],
+ "write annotation for multiple alignment to FILENAME",
+ filename=True),
+ _Switch(["-a", "--alignment-order", "alignment_order"],
+ "print sequences in alignment order rather than input order (default: off)"),
+ _Option(["-version", "version"],
+ "print out version of MSAPROBS"),
+ _Argument(["infile"],
+ "Multiple sequence input file",
+ filename=True),
+ ]
+ AbstractCommandline.__init__(self, cmd, **kwargs)
+
+
+def _test():
+ """Run the module's doctests (PRIVATE)."""
+ print("Running MSAProbs doctests...")
+ import doctest
+ doctest.testmod()
+ print("Done")
+
+
+if __name__ == "__main__":
+ _test()
diff -Nru python-biopython-1.62/Bio/Align/Applications/_Mafft.py python-biopython-1.63/Bio/Align/Applications/_Mafft.py
--- python-biopython-1.62/Bio/Align/Applications/_Mafft.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Align/Applications/_Mafft.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
"""Command line wrapper for the multiple alignment programme MAFFT.
"""
+from __future__ import print_function
+
__docformat__ = "epytext en" # Don't just use plain text in epydoc API pages!
import os
@@ -22,7 +24,7 @@
>>> mafft_exe = "/opt/local/mafft"
>>> in_file = "../Doc/examples/opuntia.fasta"
>>> mafft_cline = MafftCommandline(mafft_exe, input=in_file)
- >>> print mafft_cline
+ >>> print(mafft_cline)
/opt/local/mafft ../Doc/examples/opuntia.fasta
If the mafft binary is on the path (typically the case on a Unix style
@@ -31,7 +33,7 @@
>>> from Bio.Align.Applications import MafftCommandline
>>> in_file = "../Doc/examples/opuntia.fasta"
>>> mafft_cline = MafftCommandline(input=in_file)
- >>> print mafft_cline
+ >>> print(mafft_cline)
mafft ../Doc/examples/opuntia.fasta
You would typically run the command line with mafft_cline() or via
@@ -40,9 +42,8 @@
want to save to a file and then parse, e.g.::
stdout, stderr = mafft_cline()
- handle = open("aligned.fasta", "w")
- handle.write(stdout)
- handle.close()
+ with open("aligned.fasta", "w") as handle:
+ handle.write(stdout)
from Bio import AlignIO
align = AlignIO.read("aligned.fasta", "fasta")
@@ -78,7 +79,7 @@
Last checked against version: MAFFT v6.717b (2009/12/03)
"""
def __init__(self, cmd="mafft", **kwargs):
- BLOSUM_MATRICES = ["30","45","62","80"]
+ BLOSUM_MATRICES = ["30", "45", "62", "80"]
self.parameters = \
[
#**** Algorithm ****
@@ -367,3 +368,4 @@
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
+
diff -Nru python-biopython-1.62/Bio/Align/Applications/_Muscle.py python-biopython-1.63/Bio/Align/Applications/_Muscle.py
--- python-biopython-1.62/Bio/Align/Applications/_Muscle.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Align/Applications/_Muscle.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
"""Command line wrapper for the multiple alignment program MUSCLE.
"""
+from __future__ import print_function
+
__docformat__ = "epytext en" # Don't just use plain text in epydoc API pages!
from Bio.Application import _Option, _Switch, AbstractCommandline
@@ -22,7 +24,7 @@
>>> in_file = r"C:\My Documents\unaligned.fasta"
>>> out_file = r"C:\My Documents\aligned.fasta"
>>> muscle_cline = MuscleCommandline(muscle_exe, input=in_file, out=out_file)
- >>> print muscle_cline
+ >>> print(muscle_cline)
"C:\Program Files\Aligments\muscle3.8.31_i86win32.exe" -in "C:\My Documents\unaligned.fasta" -out "C:\My Documents\aligned.fasta"
You would typically run the command line with muscle_cline() or via
@@ -354,10 +356,10 @@
"Write PHYLIP interleaved output to specified filename",
filename=True,
equate=False),
- _Option(["-physout", "physout"],"Write PHYLIP sequential format to specified filename",
+ _Option(["-physout", "physout"], "Write PHYLIP sequential format to specified filename",
filename=True,
equate=False),
- _Option(["-htmlout", "htmlout"],"Write HTML output to specified filename",
+ _Option(["-htmlout", "htmlout"], "Write HTML output to specified filename",
filename=True,
equate=False),
_Option(["-clwout", "clwout"],
@@ -467,10 +469,10 @@
def _test():
"""Run the module's doctests (PRIVATE)."""
- print "Running MUSCLE doctests..."
+ print("Running MUSCLE doctests...")
import doctest
doctest.testmod()
- print "Done"
+ print("Done")
if __name__ == "__main__":
_test()
diff -Nru python-biopython-1.62/Bio/Align/Applications/_Prank.py python-biopython-1.63/Bio/Align/Applications/_Prank.py
--- python-biopython-1.62/Bio/Align/Applications/_Prank.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Align/Applications/_Prank.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
"""Command line wrapper for the multiple alignment program PRANK.
"""
+from __future__ import print_function
+
__docformat__ = "epytext en" # Don't just use plain text in epydoc API pages!
from Bio.Application import _Option, _Switch, AbstractCommandline
@@ -27,7 +29,7 @@
... o="aligned", #prefix only!
... f=8, #FASTA output
... notree=True, noxml=True)
- >>> print prank_cline
+ >>> print(prank_cline)
prank -d=unaligned.fasta -o=aligned -f=8 -noxml -notree
You would typically run the command line with prank_cline() or via
@@ -205,10 +207,10 @@
def _test():
"""Run the module's doctests (PRIVATE)."""
- print "Running modules doctests..."
+ print("Running modules doctests...")
import doctest
doctest.testmod()
- print "Done"
+ print("Done")
if __name__ == "__main__":
_test()
diff -Nru python-biopython-1.62/Bio/Align/Applications/_Probcons.py python-biopython-1.63/Bio/Align/Applications/_Probcons.py
--- python-biopython-1.62/Bio/Align/Applications/_Probcons.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Align/Applications/_Probcons.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
"""Command line wrapper for the multiple alignment program PROBCONS.
"""
+from __future__ import print_function
+
__docformat__ = "epytext en" # Don't just use plain text in epydoc API pages!
from Bio.Application import _Option, _Switch, _Argument, AbstractCommandline
@@ -23,7 +25,7 @@
>>> from Bio.Align.Applications import ProbconsCommandline
>>> probcons_cline = ProbconsCommandline(input="unaligned.fasta",
... clustalw=True)
- >>> print probcons_cline
+ >>> print(probcons_cline)
probcons -clustalw unaligned.fasta
You would typically run the command line with probcons_cline() or via
@@ -32,9 +34,8 @@
want to save to a file and then parse, e.g.::
stdout, stderr = probcons_cline()
- handle = open("aligned.aln", "w")
- handle.write(stdout)
- handle.close()
+ with open("aligned.aln", "w") as handle:
+ handle.write(stdout)
from Bio import AlignIO
align = AlignIO.read("aligned.fasta", "clustalw")
@@ -66,16 +67,16 @@
"Use CLUSTALW output format instead of MFA"),
_Option(["-c", "c", "--consistency", "consistency" ],
"Use 0 <= REPS <= 5 (default: 2) passes of consistency transformation",
- checker_function=lambda x: x in range(0,6),
+ checker_function=lambda x: x in range(0, 6),
equate=False),
_Option(["-ir", "--iterative-refinement", "iterative-refinement", "ir"],
"Use 0 <= REPS <= 1000 (default: 100) passes of "
"iterative-refinement",
- checker_function=lambda x: x in range(0,1001),
+ checker_function=lambda x: x in range(0, 1001),
equate=False),
_Option(["-pre", "--pre-training", "pre-training", "pre"],
"Use 0 <= REPS <= 20 (default: 0) rounds of pretraining",
- checker_function=lambda x: x in range(0,21),
+ checker_function=lambda x: x in range(0, 21),
equate=False),
_Switch(["-pairs", "pairs"],
"Generate all-pairs pairwise alignments"),
@@ -111,10 +112,10 @@
def _test():
"""Run the module's doctests (PRIVATE)."""
- print "Running modules doctests..."
+ print("Running modules doctests...")
import doctest
doctest.testmod()
- print "Done"
+ print("Done")
if __name__ == "__main__":
_test()
diff -Nru python-biopython-1.62/Bio/Align/Applications/_TCoffee.py python-biopython-1.63/Bio/Align/Applications/_TCoffee.py
--- python-biopython-1.62/Bio/Align/Applications/_TCoffee.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Align/Applications/_TCoffee.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
"""Command line wrapper for the multiple alignment program TCOFFEE.
"""
+from __future__ import print_function
+
__docformat__ = "epytext en" # Don't just use plain text in epydoc API pages!
from Bio.Application import _Option, _Switch, AbstractCommandline
@@ -28,7 +30,7 @@
>>> tcoffee_cline = TCoffeeCommandline(infile="unaligned.fasta",
... output="clustalw",
... outfile="aligned.aln")
- >>> print tcoffee_cline
+ >>> print(tcoffee_cline)
t_coffee -output clustalw -infile unaligned.fasta -outfile aligned.aln
You would typically run the command line with tcoffee_cline() or via
@@ -41,7 +43,7 @@
Last checked against: Version_6.92
"""
- SEQ_TYPES = ["dna","protein","dna_protein"]
+ SEQ_TYPES = ["dna", "protein", "dna_protein"]
def __init__(self, cmd="t_coffee", **kwargs):
self.parameters = [
@@ -102,10 +104,10 @@
def _test():
"""Run the module's doctests (PRIVATE)."""
- print "Running modules doctests..."
+ print("Running modules doctests...")
import doctest
doctest.testmod()
- print "Done"
+ print("Done")
if __name__ == "__main__":
_test()
diff -Nru python-biopython-1.62/Bio/Align/Applications/__init__.py python-biopython-1.63/Bio/Align/Applications/__init__.py
--- python-biopython-1.62/Bio/Align/Applications/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Align/Applications/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -6,14 +6,15 @@
__docformat__ = "epytext en" # Don't just use plain text in epydoc API pages!
-from _Muscle import MuscleCommandline
-from _Clustalw import ClustalwCommandline
-from _ClustalOmega import ClustalOmegaCommandline
-from _Prank import PrankCommandline
-from _Mafft import MafftCommandline
-from _Dialign import DialignCommandline
-from _Probcons import ProbconsCommandline
-from _TCoffee import TCoffeeCommandline
+from ._Muscle import MuscleCommandline
+from ._Clustalw import ClustalwCommandline
+from ._ClustalOmega import ClustalOmegaCommandline
+from ._Prank import PrankCommandline
+from ._Mafft import MafftCommandline
+from ._Dialign import DialignCommandline
+from ._Probcons import ProbconsCommandline
+from ._TCoffee import TCoffeeCommandline
+from ._MSAProbs import MSAProbsCommandline
#Make this explicit, then they show up in the API docs
__all__ = ["MuscleCommandline",
@@ -23,5 +24,6 @@
"MafftCommandline",
"DialignCommandline",
"ProbconsCommandline",
- "TCoffeeCommandline"
+ "TCoffeeCommandline",
+ "MSAProbsCommandline",
]
diff -Nru python-biopython-1.62/Bio/Align/Generic.py python-biopython-1.63/Bio/Align/Generic.py
--- python-biopython-1.62/Bio/Align/Generic.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Align/Generic.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,13 +5,16 @@
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
+"""Classes for generic sequence alignment.
+
Contains classes to deal with generic sequence alignment stuff not
specific to a particular program or format.
Classes:
- Alignment
"""
+from __future__ import print_function
+
__docformat__ = "epytext en" # Don't just use plain text in epydoc API pages!
# biopython
@@ -44,7 +47,7 @@
>>> align.add_sequence("Alpha", "ACTGCTAGCTAG")
>>> align.add_sequence("Beta", "ACT-CTAGCTAG")
>>> align.add_sequence("Gamma", "ACTGCTAGATAG")
- >>> print align
+ >>> print(align)
Gapped(IUPACUnambiguousDNA(), '-') alignment with 3 rows and 12 columns
ACTGCTAGCTAG Alpha
ACT-CTAGCTAG Beta
@@ -84,7 +87,7 @@
>>> align.add_sequence("Alpha", "ACTGCTAGCTAG")
>>> align.add_sequence("Beta", "ACT-CTAGCTAG")
>>> align.add_sequence("Gamma", "ACTGCTAGATAG")
- >>> print align
+ >>> print(align)
Gapped(IUPACUnambiguousDNA(), '-') alignment with 3 rows and 12 columns
ACTGCTAGCTAG Alpha
ACT-CTAGCTAG Beta
@@ -96,9 +99,9 @@
lines = ["%s alignment with %i rows and %i columns"
% (str(self._alphabet), rows, self.get_alignment_length())]
if rows <= 20:
- lines.extend([self._str_line(rec) for rec in self._records])
+ lines.extend(self._str_line(rec) for rec in self._records)
else:
- lines.extend([self._str_line(rec) for rec in self._records[:18]])
+ lines.extend(self._str_line(rec) for rec in self._records[:18])
lines.append("...")
lines.append(self._str_line(self._records[-1]))
return "\n".join(lines)
@@ -141,7 +144,7 @@
>>> align.add_sequence("Alpha", "ACTGCTAGCTAG")
>>> align.add_sequence("Beta", "ACT-CTAGCTAG")
>>> align.add_sequence("Gamma", "ACTGCTAGATAG")
- >>> print align.format("fasta")
+ >>> print(align.format("fasta"))
>Alpha
ACTGCTAGCTAG
>Beta
@@ -149,7 +152,7 @@
>Gamma
ACTGCTAGATAG
- >>> print align.format("phylip")
+ >>> print(align.format("phylip"))
3 12
Alpha ACTGCTAGCT AG
Beta ACT-CTAGCT AG
@@ -170,7 +173,7 @@
string supported by Bio.AlignIO as an output file format.
See also the alignment's format() method."""
if format_spec:
- from StringIO import StringIO
+ from Bio._py3k import StringIO
from Bio import AlignIO
handle = StringIO()
AlignIO.write([self], handle, format_spec)
@@ -208,8 +211,8 @@
>>> align.add_sequence("Beta", "ACT-CTAGCTAG")
>>> align.add_sequence("Gamma", "ACTGCTAGATAG")
>>> for record in align:
- ... print record.id
- ... print record.seq
+ ... print(record.id)
+ ... print(record.seq)
Alpha
ACTGCTAGCTAG
Beta
@@ -330,7 +333,7 @@
self._records.append(new_record)
- def get_column(self,col):
+ def get_column(self, col):
"""Returns a string containing a given column.
e.g.
@@ -363,23 +366,23 @@
>>> align.add_sequence("Beta", "ACT-CTAGCTAG")
>>> align.add_sequence("Gamma", "ACTGCTAGATAG")
>>> align.add_sequence("Delta", "ACTGCTTGCTAG")
- >>> align.add_sequence("Epsilon","ACTGCTTGATAG")
+ >>> align.add_sequence("Epsilon", "ACTGCTTGATAG")
You can access a row of the alignment as a SeqRecord using an integer
index (think of the alignment as a list of SeqRecord objects here):
>>> first_record = align[0]
- >>> print first_record.id, first_record.seq
+ >>> print("%s %s" % (first_record.id, first_record.seq))
Alpha ACTGCTAGCTAG
>>> last_record = align[-1]
- >>> print last_record.id, last_record.seq
+ >>> print("%s %s" % (last_record.id, last_record.seq))
Epsilon ACTGCTTGATAG
You can also access use python's slice notation to create a sub-alignment
containing only some of the SeqRecord objects:
>>> sub_alignment = align[2:5]
- >>> print sub_alignment
+ >>> print(sub_alignment)
Gapped(IUPACUnambiguousDNA(), '-') alignment with 3 rows and 12 columns
ACTGCTAGATAG Gamma
ACTGCTTGCTAG Delta
@@ -389,7 +392,7 @@
can be used to select every second sequence:
>>> sub_alignment = align[::2]
- >>> print sub_alignment
+ >>> print(sub_alignment)
Gapped(IUPACUnambiguousDNA(), '-') alignment with 3 rows and 12 columns
ACTGCTAGCTAG Alpha
ACTGCTAGATAG Gamma
@@ -398,7 +401,7 @@
Or to get a copy of the alignment with the rows in reverse order:
>>> rev_alignment = align[::-1]
- >>> print rev_alignment
+ >>> print(rev_alignment)
Gapped(IUPACUnambiguousDNA(), '-') alignment with 5 rows and 12 columns
ACTGCTTGATAG Epsilon
ACTGCTTGCTAG Delta
@@ -430,10 +433,11 @@
def _test():
"""Run the Bio.Align.Generic module's doctests."""
- print "Running doctests..."
+ print("Running doctests...")
import doctest
doctest.testmod()
- print "Done"
+ print("Done")
if __name__ == "__main__":
_test()
+
diff -Nru python-biopython-1.62/Bio/Align/__init__.py python-biopython-1.63/Bio/Align/__init__.py
--- python-biopython-1.62/Bio/Align/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Align/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -9,6 +9,8 @@
class, used in the Bio.AlignIO module.
"""
+from __future__ import print_function
+
__docformat__ = "epytext en" # Don't just use plain text in epydoc API pages!
from Bio.Seq import Seq
@@ -32,7 +34,7 @@
>>> from Bio import AlignIO
>>> align = AlignIO.read("Clustalw/opuntia.aln", "clustal")
- >>> print align
+ >>> print(align)
SingleLetterAlphabet() alignment with 7 rows and 156 columns
TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273285|gb|AF191659.1|AF191
TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273284|gb|AF191658.1|AF191
@@ -49,7 +51,7 @@
>>> len(align)
7
>>> for record in align:
- ... print record.id, len(record)
+ ... print("%s %i" % (record.id, len(record)))
gi|6273285|gb|AF191659.1|AF191 156
gi|6273284|gb|AF191658.1|AF191 156
gi|6273287|gb|AF191661.1|AF191 156
@@ -60,19 +62,19 @@
You can also access individual rows as SeqRecord objects via their index:
- >>> print align[0].id
+ >>> print(align[0].id)
gi|6273285|gb|AF191659.1|AF191
- >>> print align[-1].id
+ >>> print(align[-1].id)
gi|6273291|gb|AF191665.1|AF191
And extract columns as strings:
- >>> print align[:,1]
+ >>> print(align[:, 1])
AAAAAAA
Or, take just the first ten columns as a sub-alignment:
- >>> print align[:,:10]
+ >>> print(align[:, :10])
SingleLetterAlphabet() alignment with 7 rows and 10 columns
TATACATTAA gi|6273285|gb|AF191659.1|AF191
TATACATTAA gi|6273284|gb|AF191658.1|AF191
@@ -86,7 +88,7 @@
remove a section of the alignment. For example, taking just the first
and last ten columns:
- >>> print align[:,:10] + align[:,-10:]
+ >>> print(align[:, :10] + align[:, -10:])
SingleLetterAlphabet() alignment with 7 rows and 20 columns
TATACATTAAGTGTACCAGA gi|6273285|gb|AF191659.1|AF191
TATACATTAAGTGTACCAGA gi|6273284|gb|AF191658.1|AF191
@@ -130,7 +132,7 @@
>>> b = SeqRecord(Seq("AAA-CGT", generic_dna), id="Beta")
>>> c = SeqRecord(Seq("AAAAGGT", generic_dna), id="Gamma")
>>> align = MultipleSeqAlignment([a, b, c], annotations={"tool": "demo"})
- >>> print align
+ >>> print(align)
DNAAlphabet() alignment with 3 rows and 7 columns
AAAACGT Alpha
AAA-CGT Beta
@@ -204,7 +206,7 @@
First we create a small alignment (three rows):
>>> align = MultipleSeqAlignment([a, b, c])
- >>> print align
+ >>> print(align)
DNAAlphabet() alignment with 3 rows and 7 columns
AAAACGT Alpha
AAA-CGT Beta
@@ -213,7 +215,7 @@
Now we can extend this alignment with another two rows:
>>> align.extend([d, e])
- >>> print align
+ >>> print(align)
DNAAlphabet() alignment with 5 rows and 7 columns
AAAACGT Alpha
AAA-CGT Beta
@@ -232,7 +234,7 @@
#Take the first record's length
records = iter(records) # records arg could be list or iterator
try:
- rec = records.next()
+ rec = next(records)
except StopIteration:
#Special case, no records
return
@@ -252,7 +254,7 @@
>>> from Bio import AlignIO
>>> align = AlignIO.read("Clustalw/opuntia.aln", "clustal")
- >>> print align
+ >>> print(align)
SingleLetterAlphabet() alignment with 7 rows and 156 columns
TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273285|gb|AF191659.1|AF191
TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273284|gb|AF191658.1|AF191
@@ -273,7 +275,7 @@
Now append this to the alignment,
>>> align.append(dummy)
- >>> print align
+ >>> print(align)
SingleLetterAlphabet() alignment with 8 rows and 156 columns
TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273285|gb|AF191659.1|AF191
TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAG...AGA gi|6273284|gb|AF191658.1|AF191
@@ -336,12 +338,12 @@
Now, let's look at these two alignments:
- >>> print left
+ >>> print(left)
DNAAlphabet() alignment with 3 rows and 5 columns
AAAAC Alpha
AAA-C Beta
AAAAG Gamma
- >>> print right
+ >>> print(right)
DNAAlphabet() alignment with 3 rows and 2 columns
GT Alpha
GT Beta
@@ -350,7 +352,7 @@
And add them:
>>> combined = left + right
- >>> print combined
+ >>> print(combined)
DNAAlphabet() alignment with 3 rows and 7 columns
AAAACGT Alpha
AAA-CGT Beta
@@ -385,10 +387,10 @@
raise ValueError("When adding two alignments they must have the same length"
" (i.e. same number or rows)")
alpha = Alphabet._consensus_alphabet([self._alphabet, other._alphabet])
- merged = (left+right for left,right in zip(self, other))
+ merged = (left+right for left, right in zip(self, other))
# Take any common annotation:
annotations = dict()
- for k, v in self.annotations.iteritems():
+ for k, v in self.annotations.items():
if k in other.annotations and other.annotations[k] == v:
annotations[k] = v
return MultipleSeqAlignment(merged, alpha, annotations)
@@ -430,17 +432,17 @@
index (think of the alignment as a list of SeqRecord objects here):
>>> first_record = align[0]
- >>> print first_record.id, first_record.seq
+ >>> print("%s %s" % (first_record.id, first_record.seq))
Alpha AAAACGT
>>> last_record = align[-1]
- >>> print last_record.id, last_record.seq
+ >>> print("%s %s" % (last_record.id, last_record.seq))
Epsilon AAA-GGT
You can also access use python's slice notation to create a sub-alignment
containing only some of the SeqRecord objects:
>>> sub_alignment = align[2:5]
- >>> print sub_alignment
+ >>> print(sub_alignment)
DNAAlphabet() alignment with 3 rows and 7 columns
AAAAGGT Gamma
AAAACGT Delta
@@ -450,7 +452,7 @@
can be used to select every second sequence:
>>> sub_alignment = align[::2]
- >>> print sub_alignment
+ >>> print(sub_alignment)
DNAAlphabet() alignment with 3 rows and 7 columns
AAAACGT Alpha
AAAAGGT Gamma
@@ -459,7 +461,7 @@
Or to get a copy of the alignment with the rows in reverse order:
>>> rev_alignment = align[::-1]
- >>> print rev_alignment
+ >>> print(rev_alignment)
DNAAlphabet() alignment with 5 rows and 7 columns
AAA-GGT Epsilon
AAAACGT Delta
@@ -470,7 +472,7 @@
You can also use two indices to specify both rows and columns. Using simple
integers gives you the entry as a single character string. e.g.
- >>> align[3,4]
+ >>> align[3, 4]
'C'
This is equivalent to:
@@ -485,17 +487,17 @@
To get a single column (as a string) use this syntax:
- >>> align[:,4]
+ >>> align[:, 4]
'CCGCG'
Or, to get part of a column,
- >>> align[1:3,4]
+ >>> align[1:3, 4]
'CG'
However, in general you get a sub-alignment,
- >>> print align[1:5,3:6]
+ >>> print(align[1:5, 3:6])
DNAAlphabet() alignment with 4 rows and 3 columns
-CG Beta
AGG Gamma
@@ -555,7 +557,7 @@
If you simple try and add these without sorting, you get this:
- >>> print align1 + align2
+ >>> print(align1 + align2)
DNAAlphabet() alignment with 3 rows and 8 columns
ACGTCGGT
ACGGCGTT
@@ -568,7 +570,7 @@
>>> align1.sort()
>>> align2.sort()
- >>> print align1 + align2
+ >>> print(align1 + align2)
DNAAlphabet() alignment with 3 rows and 8 columns
ACGCCGCT Chicken
ACGTCGTT Human
@@ -578,13 +580,13 @@
GC content of each sequence.
>>> from Bio.SeqUtils import GC
- >>> print align1
+ >>> print(align1)
DNAAlphabet() alignment with 3 rows and 4 columns
ACGC Chicken
ACGT Human
ACGG Mouse
>>> align1.sort(key = lambda record: GC(record.seq))
- >>> print align1
+ >>> print(align1)
DNAAlphabet() alignment with 3 rows and 4 columns
ACGT Human
ACGC Chicken
@@ -594,7 +596,7 @@
but backwards:
>>> align1.sort(reverse=True)
- >>> print align1
+ >>> print(align1)
DNAAlphabet() alignment with 3 rows and 4 columns
ACGG Mouse
ACGT Human
@@ -646,3 +648,5 @@
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
+
+
diff -Nru python-biopython-1.62/Bio/AlignIO/ClustalIO.py python-biopython-1.63/Bio/AlignIO/ClustalIO.py
--- python-biopython-1.62/Bio/AlignIO/ClustalIO.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/AlignIO/ClustalIO.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,19 +1,20 @@
-# Copyright 2006-2010 by Peter Cock. All rights reserved.
+# Copyright 2006-2013 by Peter Cock. All rights reserved.
#
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
-Bio.AlignIO support for the "clustal" output from CLUSTAL W and other tools.
+"""Bio.AlignIO support for "clustal" output from CLUSTAL W and other tools.
You are expected to use this module via the Bio.AlignIO functions (or the
Bio.SeqIO functions if you want to work directly with the gapped sequences).
"""
+from __future__ import print_function
+
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment
-from Interfaces import AlignmentIterator, SequentialAlignmentWriter
+from .Interfaces import AlignmentIterator, SequentialAlignmentWriter
class ClustalWriter(SequentialAlignmentWriter):
@@ -83,7 +84,7 @@
class ClustalIterator(AlignmentIterator):
"""Clustalw alignment iterator."""
- def next(self):
+ def __next__(self):
handle = self.handle
try:
#Header we saved from when we were parsing
@@ -272,7 +273,7 @@
return alignment
if __name__ == "__main__":
- print "Running a quick self-test"
+ print("Running a quick self-test")
#This is a truncated version of the example in Tests/cw02.aln
#Notice the inclusion of sequence numbers (right hand side)
@@ -343,7 +344,7 @@
"""
- from StringIO import StringIO
+ from Bio._py3k import StringIO
alignments = list(ClustalIterator(StringIO(aln_example1)))
assert 1 == len(alignments)
@@ -371,14 +372,14 @@
"LKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLV"
for alignment in ClustalIterator(StringIO(aln_example2 + aln_example1)):
- print "Alignment with %i records of length %i" \
+ print("Alignment with %i records of length %i" \
% (len(alignment),
- alignment.get_alignment_length())
+ alignment.get_alignment_length()))
- print "Checking empty file..."
+ print("Checking empty file...")
assert 0 == len(list(ClustalIterator(StringIO(""))))
- print "Checking write/read..."
+ print("Checking write/read...")
alignments = list(ClustalIterator(StringIO(aln_example1))) \
+ list(ClustalIterator(StringIO(aln_example2)))*2
handle = StringIO()
@@ -388,7 +389,7 @@
assert a.get_alignment_length() == alignments[i].get_alignment_length()
handle.seek(0)
- print "Testing write/read when there is only one sequence..."
+ print("Testing write/read when there is only one sequence...")
alignment = alignment[0:1]
handle = StringIO()
ClustalWriter(handle).write_file([alignment])
@@ -465,4 +466,4 @@
assert 1 == len(alignments)
assert alignments[0]._version == "2.0.9"
- print "The End"
+ print("The End")
diff -Nru python-biopython-1.62/Bio/AlignIO/EmbossIO.py python-biopython-1.63/Bio/AlignIO/EmbossIO.py
--- python-biopython-1.62/Bio/AlignIO/EmbossIO.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/AlignIO/EmbossIO.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,10 +1,9 @@
-# Copyright 2008-2010 by Peter Cock. All rights reserved.
+# Copyright 2008-2013 by Peter Cock. All rights reserved.
#
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
-Bio.AlignIO support for the "emboss" alignment output from EMBOSS tools.
+"""Bio.AlignIO support for "emboss" alignment output from EMBOSS tools.
You are expected to use this module via the Bio.AlignIO functions (or the
Bio.SeqIO functions if you want to work directly with the gapped sequences).
@@ -13,10 +12,12 @@
example from the alignret, water and needle tools.
"""
+from __future__ import print_function
+
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment
-from Interfaces import AlignmentIterator, SequentialAlignmentWriter
+from .Interfaces import AlignmentIterator, SequentialAlignmentWriter
class EmbossWriter(SequentialAlignmentWriter):
@@ -66,7 +67,7 @@
call the "pairs" and "simple" formats.
"""
- def next(self):
+ def __next__(self):
handle = self.handle
@@ -146,7 +147,7 @@
start = int(start) - 1
end = int(end)
else:
- assert seq.replace("-", "") != ""
+ assert seq.replace("-", "") != "", repr(line)
start = int(start) - 1 # python counting
end = int(end)
@@ -164,9 +165,9 @@
if start == end:
assert seq.replace("-", "") == "", line
else:
- assert start - seq_starts[index] == len(seqs[index].replace("-","")), \
+ assert start - seq_starts[index] == len(seqs[index].replace("-", "")), \
"Found %i chars so far for sequence %i (%s, %s), line says start %i:\n%s" \
- % (len(seqs[index].replace("-","")), index, id, repr(seqs[index]),
+ % (len(seqs[index].replace("-", "")), index, id, repr(seqs[index]),
start, line)
seqs[index] += seq
@@ -188,7 +189,7 @@
#Just a spacer?
pass
else:
- print line
+ print(line)
assert False
line = handle.readline()
@@ -221,7 +222,7 @@
if __name__ == "__main__":
- print "Running a quick self-test"
+ print("Running a quick self-test")
#http://emboss.sourceforge.net/docs/themes/alnformats/align.simple
simple_example = \
@@ -579,7 +580,7 @@
#---------------------------------------
#---------------------------------------"""
- from StringIO import StringIO
+ from Bio._py3k import StringIO
alignments = list(EmbossIterator(StringIO(pair_example)))
assert len(alignments) == 1
@@ -616,4 +617,4 @@
assert [r.id for r in alignments[0]] \
== ["asis", "asis"]
- print "Done"
+ print("Done")
diff -Nru python-biopython-1.62/Bio/AlignIO/FastaIO.py python-biopython-1.63/Bio/AlignIO/FastaIO.py
--- python-biopython-1.62/Bio/AlignIO/FastaIO.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/AlignIO/FastaIO.py 2013-12-05 14:10:43.000000000 +0000
@@ -3,8 +3,7 @@
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
-Bio.AlignIO support for "fasta-m10" output from Bill Pearson's FASTA tools.
+"""Bio.AlignIO support for "fasta-m10" output from Bill Pearson's FASTA tools.
You are expected to use this module via the Bio.AlignIO functions (or the
Bio.SeqIO functions if you want to work directly with the gapped sequences).
@@ -20,6 +19,8 @@
which can also be used to store a multiple sequence alignments.
"""
+from __future__ import print_function
+
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment
@@ -77,9 +78,9 @@
handle = ...
for a in AlignIO.parse(handle, "fasta-m10"):
assert len(a) == 2, "Should be pairwise!"
- print "Alignment length %i" % a.get_alignment_length()
+ print("Alignment length %i" % a.get_alignment_length())
for record in a:
- print record.seq, record.name, record.id
+ print("%s %s %s" % (record.seq, record.name, record.id))
Note that this is not a full blown parser for all the information
in the FASTA output - for example, most of the header and all of the
@@ -121,16 +122,16 @@
else:
m = _extract_alignment_region(match_seq, match_tags)
assert len(q) == len(m)
- except AssertionError, err:
- print "Darn... amino acids vs nucleotide coordinates?"
- print tool
- print query_seq
- print query_tags
- print q, len(q)
- print match_seq
- print match_tags
- print m, len(m)
- print handle.name
+ except AssertionError as err:
+ print("Darn... amino acids vs nucleotide coordinates?")
+ print(tool)
+ print(query_seq)
+ print(query_tags)
+ print("%s %i" % (q, len(q)))
+ print(match_seq)
+ print(match_tags)
+ print("%s %i" % (m, len(m)))
+ print(handle.name)
raise err
assert alphabet is not None
@@ -141,9 +142,9 @@
alignment._annotations = {}
#Want to record both the query header tags, and the alignment tags.
- for key, value in header_tags.iteritems():
+ for key, value in header_tags.items():
alignment._annotations[key] = value
- for key, value in align_tags.iteritems():
+ for key, value in align_tags.items():
alignment._annotations[key] = value
#Query
@@ -356,7 +357,7 @@
if __name__ == "__main__":
- print "Running a quick self-test"
+ print("Running a quick self-test")
#http://emboss.sourceforge.net/docs/themes/alnformats/align.simple
simple_example = \
@@ -592,30 +593,29 @@
"""
- from StringIO import StringIO
+ from Bio._py3k import StringIO
alignments = list(FastaM10Iterator(StringIO(simple_example)))
assert len(alignments) == 4, len(alignments)
assert len(alignments[0]) == 2
for a in alignments:
- print "Alignment %i sequences of length %i" \
- % (len(a), a.get_alignment_length())
+ print("Alignment %i sequences of length %i" \
+ % (len(a), a.get_alignment_length()))
for r in a:
- print "%s %s %i" % (r.seq, r.id, r.annotations["original_length"])
- #print a.annotations
- print "Done"
+ print("%s %s %i" % (r.seq, r.id, r.annotations["original_length"]))
+ #print(a.annotations)
+ print("Done")
import os
path = "../../Tests/Fasta/"
- files = [f for f in os.listdir(path) if os.path.splitext(f)[-1] == ".m10"]
- files.sort()
+ files = sorted(f for f in os.listdir(path) if os.path.splitext(f)[-1] == ".m10")
for filename in files:
if os.path.splitext(filename)[-1] == ".m10":
- print
- print filename
- print "=" * len(filename)
+ print("")
+ print(filename)
+ print("=" * len(filename))
for i, a in enumerate(FastaM10Iterator(open(os.path.join(path, filename)))):
- print "#%i, %s" % (i+1, a)
+ print("#%i, %s" % (i+1, a))
for r in a:
if "-" in r.seq:
assert r.seq.alphabet.gap_char == "-"
diff -Nru python-biopython-1.62/Bio/AlignIO/Interfaces.py python-biopython-1.63/Bio/AlignIO/Interfaces.py
--- python-biopython-1.62/Bio/AlignIO/Interfaces.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/AlignIO/Interfaces.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,14 +1,17 @@
-# Copyright 2008-2010 by Peter Cock. All rights reserved.
+# Copyright 2008-2013 by Peter Cock. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
-AlignIO support module (not for general use).
+"""AlignIO support module (not for general use).
Unless you are writing a new parser or writer for Bio.AlignIO, you should not
use this module. It provides base classes to try and simplify things.
"""
+from __future__ import print_function
+
+import sys # for checking if Python 2
+
from Bio.Alphabet import single_letter_alphabet
@@ -43,7 +46,7 @@
# or if additional arguments are required. #
#####################################################
- def next(self):
+ def __next__(self):
"""Return the next alignment in the file.
This method should be replaced by any derived class to do something
@@ -55,19 +58,29 @@
# into MultipleSeqAlignment objects. #
#####################################################
+ if sys.version_info[0] < 3:
+ def next(self):
+ """Deprecated Python 2 style alias for Python 3 style __next__ method."""
+ import warnings
+ from Bio import BiopythonDeprecationWarning
+ warnings.warn("Please use next(my_iterator) instead of my_iterator.next(), "
+ "the .next() method is deprecated and will be removed in a "
+ "future release of Biopython.", BiopythonDeprecationWarning)
+ return self.__next__()
+
def __iter__(self):
"""Iterate over the entries as MultipleSeqAlignment objects.
Example usage for (concatenated) PHYLIP files:
- myFile = open("many.phy","r")
- for alignment in PhylipIterator(myFile):
- print "New alignment:"
- for record in alignment:
- print record.id
- print record.seq
- myFile.close()"""
- return iter(self.next, None)
+ with open("many.phy","r") as myFile:
+ for alignment in PhylipIterator(myFile):
+ print "New alignment:"
+ for record in alignment:
+ print record.id
+ print record.seq
+ """
+ return iter(self.__next__, None)
class AlignmentWriter(object):
diff -Nru python-biopython-1.62/Bio/AlignIO/NexusIO.py python-biopython-1.63/Bio/AlignIO/NexusIO.py
--- python-biopython-1.62/Bio/AlignIO/NexusIO.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/AlignIO/NexusIO.py 2013-12-05 14:10:43.000000000 +0000
@@ -3,8 +3,7 @@
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
-Bio.AlignIO support for the "nexus" file format.
+"""Bio.AlignIO support for the "nexus" file format.
You are expected to use this module via the Bio.AlignIO functions (or the
Bio.SeqIO functions if you want to work directly with the gapped sequences).
@@ -14,10 +13,12 @@
sequences as SeqRecord objects.
"""
+from __future__ import print_function
+
from Bio.SeqRecord import SeqRecord
from Bio.Nexus import Nexus
from Bio.Align import MultipleSeqAlignment
-from Interfaces import AlignmentWriter
+from .Interfaces import AlignmentWriter
from Bio import Alphabet
#You can get a couple of example files here:
@@ -75,7 +76,7 @@
"""
align_iter = iter(alignments) # Could have been a list
try:
- first_alignment = align_iter.next()
+ first_alignment = next(align_iter)
except StopIteration:
first_alignment = None
if first_alignment is None:
@@ -84,7 +85,7 @@
#Check there is only one alignment...
try:
- second_alignment = align_iter.next()
+ second_alignment = next(align_iter)
except StopIteration:
second_alignment = None
if second_alignment is not None:
@@ -136,10 +137,10 @@
raise ValueError("Need a DNA, RNA or Protein alphabet")
if __name__ == "__main__":
- from StringIO import StringIO
- print "Quick self test"
- print
- print "Repeated names without a TAXA block"
+ from Bio._py3k import StringIO
+ print("Quick self test")
+ print("")
+ print("Repeated names without a TAXA block")
handle = StringIO("""#NEXUS
[TITLE: NoName]
@@ -156,13 +157,13 @@
end;
""")
for a in NexusIterator(handle):
- print a
+ print(a)
for r in a:
- print repr(r.seq), r.name, r.id
- print "Done"
+ print("%r %s %s" % (r.seq, r.name, r.id))
+ print("Done")
- print
- print "Repeated names with a TAXA block"
+ print("")
+ print("Repeated names with a TAXA block")
handle = StringIO("""#NEXUS
[TITLE: NoName]
@@ -186,21 +187,21 @@
end;
""")
for a in NexusIterator(handle):
- print a
+ print(a)
for r in a:
- print repr(r.seq), r.name, r.id
- print "Done"
- print
- print "Reading an empty file"
+ print("%r %s %s" % (r.seq, r.name, r.id))
+ print("Done")
+ print("")
+ print("Reading an empty file")
assert 0 == len(list(NexusIterator(StringIO())))
- print "Done"
- print
- print "Writing..."
+ print("Done")
+ print("")
+ print("Writing...")
handle = StringIO()
NexusWriter(handle).write_file([a])
handle.seek(0)
- print handle.read()
+ print(handle.read())
handle = StringIO()
try:
diff -Nru python-biopython-1.62/Bio/AlignIO/PhylipIO.py python-biopython-1.63/Bio/AlignIO/PhylipIO.py
--- python-biopython-1.62/Bio/AlignIO/PhylipIO.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/AlignIO/PhylipIO.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,10 +1,9 @@
-# Copyright 2006-2011 by Peter Cock. All rights reserved.
+# Copyright 2006-2013 by Peter Cock. All rights reserved.
# Revisions copyright 2011 Brandon Invergo. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
-AlignIO support for the "phylip" format used in Joe Felsenstein's PHYLIP tools.
+"""AlignIO support for "phylip" format from Joe Felsenstein's PHYLIP tools.
You are expected to use this module via the Bio.AlignIO functions (or the
Bio.SeqIO functions if you want to work directly with the gapped sequences).
@@ -32,12 +31,16 @@
Biopython 1.58 or later treats dots/periods in the sequence as invalid, both
for reading and writing. Older versions did nothing special with a dot/period.
"""
+from __future__ import print_function
+
import string
+from Bio._py3k import range
+
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment
-from Interfaces import AlignmentIterator, SequentialAlignmentWriter
+from .Interfaces import AlignmentIterator, SequentialAlignmentWriter
_PHYLIP_ID_WIDTH = 10
@@ -161,7 +164,7 @@
def _is_header(self, line):
line = line.strip()
- parts = filter(None, line.split())
+ parts = [x for x in line.split() if x]
if len(parts) != 2:
return False # First line should have two integers
try:
@@ -185,7 +188,7 @@
seq = line[self.id_width:].strip().replace(' ', '')
return seq_id, seq
- def next(self):
+ def __next__(self):
handle = self.handle
try:
@@ -199,7 +202,7 @@
if not line:
raise StopIteration
line = line.strip()
- parts = filter(None, line.split())
+ parts = [x for x in line.split() if x]
if len(parts) != 2:
raise ValueError("First line should have two integers")
try:
@@ -220,7 +223,7 @@
# By default, expects STRICT truncation / padding to 10 characters.
# Does not require any whitespace between name and seq.
- for i in xrange(number_of_seqs):
+ for i in range(number_of_seqs):
line = handle.readline().rstrip()
sequence_id, s = self._split_id(line)
ids.append(sequence_id)
@@ -245,7 +248,7 @@
break
#print "New block..."
- for i in xrange(number_of_seqs):
+ for i in range(number_of_seqs):
s = line.strip().replace(" ", "")
if "." in s:
raise ValueError("PHYLIP format no longer allows dots in sequence")
@@ -370,7 +373,7 @@
the next. According to the PHYLIP documentation for input file formatting,
newlines and spaces may optionally be entered at any point in the sequences.
"""
- def next(self):
+ def __next__(self):
handle = self.handle
try:
@@ -384,7 +387,7 @@
if not line:
raise StopIteration
line = line.strip()
- parts = filter(None, line.split())
+ parts = [x for x in line.split() if x]
if len(parts) != 2:
raise ValueError("First line should have two integers")
try:
@@ -405,7 +408,7 @@
# By default, expects STRICT truncation / padding to 10 characters.
# Does not require any whitespace between name and seq.
- for i in xrange(number_of_seqs):
+ for i in range(number_of_seqs):
line = handle.readline().rstrip()
sequence_id, s = self._split_id(line)
ids.append(sequence_id)
@@ -439,7 +442,7 @@
if __name__ == "__main__":
- print "Running short mini-test"
+ print("Running short mini-test")
phylip_text = """ 8 286
V_Harveyi_ --MKNWIKVA VAAIA--LSA A--------- ---------T VQAATEVKVG
@@ -497,13 +500,13 @@
LREALNKAFA EMRADGTYEK LAKKYFDFDV YGG---
"""
- from cStringIO import StringIO
+ from Bio._py3k import StringIO
handle = StringIO(phylip_text)
count = 0
for alignment in PhylipIterator(handle):
for record in alignment:
count = count+1
- print record.id
+ print(record.id)
#print str(record.seq)
assert count == 8
@@ -600,9 +603,9 @@
list5 = list(PhylipIterator(handle))
assert len(list5) == 1
assert len(list5[0]) == 5
- print "That should have failed..."
+ print("That should have failed...")
except ValueError:
- print "Evil multiline non-interlaced example failed as expected"
+ print("Evil multiline non-interlaced example failed as expected")
handle.close()
handle = StringIO(phylip_text5a)
@@ -611,16 +614,16 @@
assert len(list5) == 1
assert len(list4[0]) == 5
- print "Concatenation"
+ print("Concatenation")
handle = StringIO(phylip_text4 + "\n" + phylip_text4)
assert len(list(PhylipIterator(handle))) == 2
handle = StringIO(phylip_text3 + "\n" + phylip_text4 + "\n\n\n" + phylip_text)
assert len(list(PhylipIterator(handle))) == 3
- print "OK"
+ print("OK")
- print "Checking write/read"
+ print("Checking write/read")
handle = StringIO()
PhylipWriter(handle).write_file(list5)
handle.seek(0)
@@ -631,4 +634,4 @@
for r1, r2 in zip(a1, a2):
assert r1.id == r2.id
assert str(r1.seq) == str(r2.seq)
- print "Done"
+ print("Done")
diff -Nru python-biopython-1.62/Bio/AlignIO/StockholmIO.py python-biopython-1.63/Bio/AlignIO/StockholmIO.py
--- python-biopython-1.62/Bio/AlignIO/StockholmIO.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/AlignIO/StockholmIO.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,9 +1,9 @@
-# Copyright 2006-2010 by Peter Cock. All rights reserved.
+# Copyright 2006-2013 by Peter Cock. All rights reserved.
+#
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
-Bio.AlignIO support for the "stockholm" format (used in the PFAM database).
+"""Bio.AlignIO support for "stockholm" format (used in the PFAM database).
You are expected to use this module via the Bio.AlignIO functions (or the
Bio.SeqIO functions if you want to work directly with the gapped sequences).
@@ -29,12 +29,12 @@
>>> from Bio import AlignIO
>>> align = AlignIO.read("Stockholm/simple.sth", "stockholm")
- >>> print align
+ >>> print(align)
SingleLetterAlphabet() alignment with 2 rows and 104 columns
UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-G...UGU AP001509.1
AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-C...GAU AE007476.1
>>> for record in align:
- ... print record.id, len(record)
+ ... print("%s %i" % (record.id, len(record)))
AP001509.1 104
AE007476.1 104
@@ -47,7 +47,7 @@
>>> from Bio.Alphabet import generic_rna
>>> align = AlignIO.read("Stockholm/simple.sth", "stockholm",
... alphabet=generic_rna)
- >>> print align
+ >>> print(align)
RNAAlphabet() alignment with 2 rows and 104 columns
UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-G...UGU AP001509.1
AAAAUUGAAUAUCGUUUUACUUGUUUAU-GUCGUGAAU-UGG-C...GAU AE007476.1
@@ -57,9 +57,9 @@
strings, with one character for each letter in the associated sequence:
>>> for record in align:
- ... print record.id
- ... print record.seq
- ... print record.letter_annotations['secondary_structure']
+ ... print(record.id)
+ ... print(record.seq)
+ ... print(record.letter_annotations['secondary_structure'])
AP001509.1
UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-AGGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU
-----------------<<<<<<<<---..<<-<<-------->>->>..---------<<<<<--------->>>>>--->>>>>>>>---------------
@@ -71,7 +71,7 @@
dictionary. You can output this alignment in many different file formats
using Bio.AlignIO.write(), or the MultipleSeqAlignment object's format method:
- >>> print align.format("fasta")
+ >>> print(align.format("fasta"))
>AP001509.1
UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-A
GGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU
@@ -83,7 +83,7 @@
Most output formats won't be able to hold the annotation possible in a
Stockholm file:
- >>> print align.format("stockholm")
+ >>> print(align.format("stockholm"))
# STOCKHOLM 1.0
#=GF SQ 2
AP001509.1 UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-AGGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU
@@ -110,9 +110,9 @@
>>> from Bio.Alphabet import generic_rna
>>> for record in SeqIO.parse("Stockholm/simple.sth", "stockholm",
... alphabet=generic_rna):
- ... print record.id
- ... print record.seq
- ... print record.letter_annotations['secondary_structure']
+ ... print(record.id)
+ ... print(record.seq)
+ ... print(record.letter_annotations['secondary_structure'])
AP001509.1
UUAAUCGAGCUCAACACUCUUCGUAUAUCCUC-UCAAUAUGG-GAUGAGGGUCUCUAC-AGGUA-CCGUAAA-UACCUAGCUACGAAAAGAAUGCAGUUAAUGU
-----------------<<<<<<<<---..<<-<<-------->>->>..---------<<<<<--------->>>>>--->>>>>>>>---------------
@@ -124,16 +124,18 @@
secondary structure string here, are also sliced:
>>> sub_record = record[10:20]
- >>> print sub_record.seq
+ >>> print(sub_record.seq)
AUCGUUUUAC
- >>> print sub_record.letter_annotations['secondary_structure']
+ >>> print(sub_record.letter_annotations['secondary_structure'])
-------<<<
"""
+from __future__ import print_function
+
__docformat__ = "epytext en" # not just plaintext
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment
-from Interfaces import AlignmentIterator, SequentialAlignmentWriter
+from .Interfaces import AlignmentIterator, SequentialAlignmentWriter
class StockholmWriter(SequentialAlignmentWriter):
@@ -238,7 +240,7 @@
% (seq_name, self.clean(xref)))
#GS = other per sequence annotation
- for key, value in record.annotations.iteritems():
+ for key, value in record.annotations.items():
if key in self.pfam_gs_mapping:
data = self.clean(str(value))
if data:
@@ -252,7 +254,7 @@
pass
#GR = per row per column sequence annotation
- for key, value in record.letter_annotations.iteritems():
+ for key, value in record.letter_annotations.items():
if key in self.pfam_gr_mapping and len(str(value)) == len(record.seq):
data = self.clean(str(value))
if data:
@@ -310,7 +312,7 @@
"OC": "organism_classification",
"LO": "look"}
- def next(self):
+ def __next__(self):
try:
line = self._header
del self._header
@@ -321,8 +323,6 @@
raise StopIteration
if not line.strip() == '# STOCKHOLM 1.0':
raise ValueError("Did not find STOCKHOLM header")
- #import sys
- #print >> sys.stderr, 'Warning file does not start with STOCKHOLM 1.0'
# Note: If this file follows the PFAM conventions, there should be
# a line containing the number of sequences, e.g. "#=GF SQ 67"
@@ -335,7 +335,7 @@
gr = {}
gf = {}
passed_end_alignment = False
- while 1:
+ while True:
line = self.handle.readline()
if not line:
break # end of file
@@ -424,7 +424,7 @@
raise ValueError("Found %i records in this alignment, told to expect %i"
% (len(ids), self.records_per_alignment))
- alignment_length = len(seqs.values()[0])
+ alignment_length = len(list(seqs.values())[0])
records = [] # Alignment obj will put them all in a list anyway
for id in ids:
seq = seqs[id]
@@ -456,17 +456,17 @@
raise StopIteration
def _identifier_split(self, identifier):
- """Returns (name,start,end) string tuple from an identier."""
+ """Returns (name, start, end) string tuple from an identier."""
if '/' in identifier:
name, start_end = identifier.rsplit("/", 1)
if start_end.count("-") == 1:
try:
- start, end = map(int, start_end.split("-"))
- return (name, start, end)
+ start, end = start_end.split("-")
+ return name, int(start), int(end)
except ValueError:
# Non-integers after final '/' - fall through
pass
- return (identifier, None, None)
+ return identifier, None, None
def _get_meta_data(self, identifier, meta_dict):
"""Takes an itentifier and returns dict of all meta-data matching it.
@@ -537,3 +537,4 @@
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
+
diff -Nru python-biopython-1.62/Bio/AlignIO/__init__.py python-biopython-1.63/Bio/AlignIO/__init__.py
--- python-biopython-1.62/Bio/AlignIO/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/AlignIO/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -27,7 +27,7 @@
>>> from Bio import AlignIO
>>> align = AlignIO.read("Phylip/interlaced.phy", "phylip")
- >>> print align
+ >>> print(align)
SingleLetterAlphabet() alignment with 3 rows and 384 columns
-----MKVILLFVLAVFTVFVSS---------------RGIPPE...I-- CYS1_DICDI
MAHARVLLLALAVLATAAVAVASSSSFADSNPIRPVTDRAASTL...VAA ALEU_HORVU
@@ -41,7 +41,7 @@
>>> from Bio import AlignIO
>>> alignments = list(AlignIO.parse("Emboss/needle.txt", "emboss"))
- >>> print alignments[2]
+ >>> print(alignments[2])
SingleLetterAlphabet() alignment with 2 rows and 120 columns
-KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKER...--- ref_rec
LHIVVVDDDPGTCVYIESVFAELGHTCKSFVRPEAAEEYILTHP...HKE gi|94967506|receiver
@@ -65,9 +65,8 @@
from Bio import AlignIO
alignments = ...
- handle = open("example.faa", "w")
- count = SeqIO.write(alignments, handle, "fasta")
- handle.close()
+ with open("example.faa", "w") as handle:
+ count = SeqIO.write(alignments, handle, "fasta")
In general, you are expected to call this function once (with all your
alignments) and then close the file handle. However, for file formats
@@ -120,8 +119,9 @@
same length.
"""
-# For using with statement in Python 2.5 or Jython
-from __future__ import with_statement
+
+from __future__ import print_function
+from Bio._py3k import basestring
__docformat__ = "epytext en" # not just plaintext
@@ -144,12 +144,12 @@
from Bio.Alphabet import Alphabet, AlphabetEncoder, _get_base_alphabet
from Bio.File import as_handle
-import StockholmIO
-import ClustalIO
-import NexusIO
-import PhylipIO
-import EmbossIO
-import FastaIO
+from . import StockholmIO
+from . import ClustalIO
+from . import NexusIO
+from . import PhylipIO
+from . import EmbossIO
+from . import FastaIO
#Convention for format names is "mainname-subtype" in lower case.
#Please use the same names as BioPerl and EMBOSS where possible.
@@ -313,7 +313,7 @@
>>> filename = "Emboss/needle.txt"
>>> format = "emboss"
>>> for alignment in AlignIO.parse(filename, format):
- ... print "Alignment of length", alignment.get_alignment_length()
+ ... print("Alignment of length %i" % alignment.get_alignment_length())
Alignment of length 124
Alignment of length 119
Alignment of length 120
@@ -392,7 +392,7 @@
>>> filename = "Clustalw/protein.aln"
>>> format = "clustal"
>>> alignment = AlignIO.read(filename, format)
- >>> print "Alignment of length", alignment.get_alignment_length()
+ >>> print("Alignment of length %i" % alignment.get_alignment_length())
Alignment of length 411
If however you want the first alignment from a file containing
@@ -411,8 +411,8 @@
>>> from Bio import AlignIO
>>> filename = "Emboss/needle.txt"
>>> format = "emboss"
- >>> alignment = AlignIO.parse(filename, format).next()
- >>> print "First alignment has length", alignment.get_alignment_length()
+ >>> alignment = next(AlignIO.parse(filename, format))
+ >>> print("First alignment has length %i" % alignment.get_alignment_length())
First alignment has length 124
You must use the Bio.AlignIO.parse() function if you want to read multiple
@@ -420,13 +420,13 @@
"""
iterator = parse(handle, format, seq_count, alphabet)
try:
- first = iterator.next()
+ first = next(iterator)
except StopIteration:
first = None
if first is None:
raise ValueError("No records found in handle")
try:
- second = iterator.next()
+ second = next(iterator)
except StopIteration:
second = None
if second is not None:
@@ -466,3 +466,4 @@
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
+
diff -Nru python-biopython-1.62/Bio/Application/__init__.py python-biopython-1.63/Bio/Application/__init__.py
--- python-biopython-1.62/Bio/Application/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Application/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -19,9 +19,12 @@
The finished command line strings are then normally invoked via the built-in
Python module subprocess.
"""
+from __future__ import print_function
+from Bio._py3k import basestring
+
import os
+import platform
import sys
-import StringIO
import subprocess
import re
@@ -31,11 +34,13 @@
#Use this regular expression to test the property names are going to
#be valid as Python properties or arguments
-_re_prop_name = re.compile(r"[a-zA-Z][a-zA-Z0-9_]*")
+_re_prop_name = re.compile(r"^[a-zA-Z][a-zA-Z0-9_]*$")
assert _re_prop_name.match("t")
assert _re_prop_name.match("test")
assert _re_prop_name.match("_test") is None # we don't want private names
assert _re_prop_name.match("-test") is None
+assert _re_prop_name.match("any-hyphen") is None
+assert _re_prop_name.match("underscore_ok")
assert _re_prop_name.match("test_name")
assert _re_prop_name.match("test2")
#These are reserved names in Python itself,
@@ -60,7 +65,7 @@
>>> err = ApplicationError(-11, "helloworld", "", "Some error text")
>>> err.returncode, err.cmd, err.stdout, err.stderr
(-11, 'helloworld', '', 'Some error text')
- >>> print err
+ >>> print(err)
Command 'helloworld' returned non-zero exit status -11, 'Some error text'
"""
@@ -135,7 +140,7 @@
>>> water_cmd.asequence = "asis:ACCCGGGCGCGGT"
>>> water_cmd.bsequence = "asis:ACCCGAGCGCGGT"
>>> water_cmd.outfile = "temp_water.txt"
- >>> print water_cmd
+ >>> print(water_cmd)
water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
>>> water_cmd
WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
@@ -157,7 +162,7 @@
... asequence="asis:ACCCGGGCGCGGT",
... bsequence="asis:ACCCGAGCGCGGT",
... outfile="temp_water.txt")
- >>> print water_cmd
+ >>> print(water_cmd)
"C:\Program Files\EMBOSS\water.exe" -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
Notice that since the path name includes a space it has automatically
@@ -238,7 +243,7 @@
"argument value required." % p.names[0]
prop = property(getter(name), setter(name), deleter(name), doc)
setattr(self.__class__, name, prop) # magic!
- for key, value in kwargs.iteritems():
+ for key, value in kwargs.items():
self.set_parameter(key, value)
def _validate(self):
@@ -265,7 +270,7 @@
>>> cline.asequence = "asis:ACCCGGGCGCGGT"
>>> cline.bsequence = "asis:ACCCGAGCGCGGT"
>>> cline.outfile = "temp_water.txt"
- >>> print cline
+ >>> print(cline)
water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
>>> str(cline)
'water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5'
@@ -287,7 +292,7 @@
>>> cline.asequence = "asis:ACCCGGGCGCGGT"
>>> cline.bsequence = "asis:ACCCGAGCGCGGT"
>>> cline.outfile = "temp_water.txt"
- >>> print cline
+ >>> print(cline)
water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
>>> cline
WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
@@ -387,7 +392,7 @@
Traceback (most recent call last):
...
ValueError: Option name csequence was not found.
- >>> print cline
+ >>> print(cline)
water -stdout -asequence=a.fasta -bsequence=b.fasta -gapopen=10 -gapextend=0.5
This workaround uses a whitelist of object attributes, and sets the
@@ -468,11 +473,23 @@
#
#Using universal newlines is important on Python 3, this
#gives unicode handles rather than bytes handles.
+
+ #Windows 7 and 8 want shell = True
+ #platform is easier to understand that sys to determine
+ #windows version
+ if sys.platform != "win32":
+ use_shell = True
+ else:
+ win_ver = platform.win32_ver()[0]
+ if win_ver in ["7", "8"]:
+ use_shell = True
+ else:
+ use_shell = False
child_process = subprocess.Popen(str(self), stdin=subprocess.PIPE,
stdout=stdout_arg, stderr=stderr_arg,
universal_newlines=True,
cwd=cwd, env=env,
- shell=(sys.platform!="win32"))
+ shell=use_shell)
#Use .communicate as can get deadlocks with .wait(), see Bug 2804
stdout_str, stderr_str = child_process.communicate(stdin)
if not stdout:
@@ -676,9 +693,9 @@
Note this will not add quotes if they are already included:
- >>> print _escape_filename('example with spaces')
+ >>> print((_escape_filename('example with spaces')))
"example with spaces"
- >>> print _escape_filename('"example with spaces"')
+ >>> print((_escape_filename('"example with spaces"')))
"example with spaces"
"""
#Is adding the following helpful
@@ -712,3 +729,4 @@
if __name__ == "__main__":
#Run the doctests
_test()
+
diff -Nru python-biopython-1.62/Bio/Blast/Applications.py python-biopython-1.63/Bio/Blast/Applications.py
--- python-biopython-1.62/Bio/Blast/Applications.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Blast/Applications.py 2013-12-05 14:10:43.000000000 +0000
@@ -32,6 +32,8 @@
BMC Bioinformatics 2009, 10:421
doi:10.1186/1471-2105-10-421
"""
+from __future__ import print_function
+
from Bio import BiopythonDeprecationWarning
from Bio.Application import _Option, AbstractCommandline, _Switch
@@ -206,7 +208,7 @@
... database="nr", expectation=0.001)
>>> cline
BlastallCommandline(cmd='blastall', database='nr', infile='m_cold.fasta', expectation=0.001, program='blastx')
- >>> print cline
+ >>> print(cline)
blastall -d nr -i m_cold.fasta -e 0.001 -p blastx
You would typically run the command line with cline() or via the Python
@@ -310,7 +312,7 @@
>>> cline = BlastpgpCommandline(help=True)
>>> cline
BlastpgpCommandline(cmd='blastpgp', help=True)
- >>> print cline
+ >>> print(cline)
blastpgp --help
You would typically run the command line with cline() or via the Python
@@ -389,7 +391,7 @@
>>> cline = RpsBlastCommandline(help=True)
>>> cline
RpsBlastCommandline(cmd='rpsblast', help=True)
- >>> print cline
+ >>> print(cline)
rpsblast --help
You would typically run the command line with cline() or via the Python
@@ -455,15 +457,15 @@
"(differs from classic BLAST which used 7 for XML).",
equate=False),
#TODO - Document and test the column options
- _Switch(["-show_gis","show_gis"],
+ _Switch(["-show_gis", "show_gis"],
"Show NCBI GIs in deflines?"),
- _Option(["-num_descriptions","num_descriptions"],
+ _Option(["-num_descriptions", "num_descriptions"],
"""Number of database sequences to show one-line descriptions for.
Integer argument (at least zero). Default is 500.
See also num_alignments.""",
equate=False),
- _Option(["-num_alignments","num_alignments"],
+ _Option(["-num_alignments", "num_alignments"],
"""Number of database sequences to show num_alignments for.
Integer argument (at least zero). Default is 200.
@@ -491,7 +493,7 @@
for b in incompatibles[a]:
if self._get_parameter(b):
raise ValueError("Options %s and %s are incompatible."
- % (a,b))
+ % (a, b))
class _NcbiblastCommandline(_NcbibaseblastCommandline):
@@ -518,7 +520,7 @@
_Option(["-evalue", "evalue"],
"Expectation value cutoff.",
equate=False),
- _Option(["-word_size","word_size"],
+ _Option(["-word_size", "word_size"],
"""Word size for wordfinder algorithm.
Integer. Minimum 2.""",
@@ -697,7 +699,7 @@
def _validate(self):
incompatibles = {"subject_loc":["db", "gilist", "negative_gilist", "seqidlist", "remote"],
- "culling_limit":["best_hit_overhang","best_hit_score_edge"],
+ "culling_limit":["best_hit_overhang", "best_hit_score_edge"],
"subject":["db", "gilist", "negative_gilist", "seqidlist"]}
self._validate_incompatibilities(incompatibles)
_NcbiblastCommandline._validate(self)
@@ -757,7 +759,7 @@
... evalue=0.001, remote=True, ungapped=True)
>>> cline
NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True)
- >>> print cline
+ >>> print(cline)
blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped
You would typically run the command line with cline() or via the Python
@@ -820,7 +822,7 @@
... evalue=0.001, out="m_cold.xml", outfmt=5)
>>> cline
NcbiblastnCommandline(cmd='blastn', out='m_cold.xml', outfmt=5, query='m_cold.fasta', db='nt', evalue=0.001, strand='plus')
- >>> print cline
+ >>> print(cline)
blastn -out m_cold.xml -outfmt 5 -query m_cold.fasta -db nt -evalue 0.001 -strand plus
You would typically run the command line with cline() or via the Python
@@ -889,7 +891,7 @@
Allowed values: 'coding', 'coding_and_optimal' or 'optimal'
Requires: template_length.""",
- checker_function=lambda value : value in ['coding', 'coding_and_optimal','optimal'],
+ checker_function=lambda value : value in ['coding', 'coding_and_optimal', 'optimal'],
equate=False),
_Option(["-template_length", "template_length"],
"""Discontiguous MegaBLAST template length (integer).
@@ -897,7 +899,7 @@
Allowed values: 16, 18, 21
Requires: template_type.""",
- checker_function=lambda value : value in [16,18,21,'16','18','21'],
+ checker_function=lambda value : value in [16, 18, 21, '16', '18', '21'],
equate=False),
#Extension options:
_Switch(["-no_greedy", "no_greedy"],
@@ -937,7 +939,7 @@
>>> cline = NcbiblastxCommandline(query="m_cold.fasta", db="nr", evalue=0.001)
>>> cline
NcbiblastxCommandline(cmd='blastx', query='m_cold.fasta', db='nr', evalue=0.001)
- >>> print cline
+ >>> print(cline)
blastx -query m_cold.fasta -db nr -evalue 0.001
You would typically run the command line with cline() or via the Python
@@ -1018,7 +1020,7 @@
>>> cline = NcbitblastnCommandline(help=True)
>>> cline
NcbitblastnCommandline(cmd='tblastn', help=True)
- >>> print cline
+ >>> print(cline)
tblastn -help
You would typically run the command line with cline() or via the Python
@@ -1098,7 +1100,7 @@
>>> cline = NcbitblastxCommandline(help=True)
>>> cline
NcbitblastxCommandline(cmd='tblastx', help=True)
- >>> print cline
+ >>> print(cline)
tblastx -help
You would typically run the command line with cline() or via the Python
@@ -1161,7 +1163,7 @@
>>> cline = NcbipsiblastCommandline(help=True)
>>> cline
NcbipsiblastCommandline(cmd='psiblast', help=True)
- >>> print cline
+ >>> print(cline)
psiblast -help
You would typically run the command line with cline() or via the Python
@@ -1266,10 +1268,10 @@
_Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
def _validate(self):
- incompatibles = {"num_iterations":["remote"],
- "in_msa":["in_pssm", "query"],
- "in_pssm":["in_msa","query","phi_pattern"],
- "ignore_msa_master":["msa_master_idx", "in_pssm",
+ incompatibles = {"num_iterations": ["remote"],
+ "in_msa": ["in_pssm", "query"],
+ "in_pssm": ["in_msa", "query", "phi_pattern"],
+ "ignore_msa_master": ["msa_master_idx", "in_pssm",
"query", "query_loc", "phi_pattern"],
}
self._validate_incompatibilities(incompatibles)
@@ -1287,7 +1289,7 @@
>>> cline = NcbirpsblastCommandline(help=True)
>>> cline
NcbirpsblastCommandline(cmd='rpsblast', help=True)
- >>> print cline
+ >>> print(cline)
rpsblast -help
You would typically run the command line with cline() or via the Python
@@ -1346,7 +1348,7 @@
_NcbiblastCommandline.__init__(self, cmd, **kwargs)
def _validate(self):
- incompatibles = {"culling_limit":["best_hit_overhang","best_hit_score_edge"]}
+ incompatibles = {"culling_limit":["best_hit_overhang", "best_hit_score_edge"]}
self._validate_incompatibilities(incompatibles)
_NcbiblastCommandline._validate(self)
@@ -1362,7 +1364,7 @@
>>> cline = NcbirpstblastnCommandline(help=True)
>>> cline
NcbirpstblastnCommandline(cmd='rpstblastn', help=True)
- >>> print cline
+ >>> print(cline)
rpstblastn -help
You would typically run the command line with cline() or via the Python
@@ -1413,7 +1415,7 @@
>>> cline = NcbiblastformatterCommandline(archive="example.asn", outfmt=5, out="example.xml")
>>> cline
NcbiblastformatterCommandline(cmd='blast_formatter', out='example.xml', outfmt=5, archive='example.asn')
- >>> print cline
+ >>> print(cline)
blast_formatter -out example.xml -outfmt 5 -archive example.asn
You would typically run the command line with cline() or via the Python
@@ -1458,3 +1460,4 @@
if __name__ == "__main__":
#Run the doctests
_test()
+
diff -Nru python-biopython-1.62/Bio/Blast/NCBIStandalone.py python-biopython-1.63/Bio/Blast/NCBIStandalone.py
--- python-biopython-1.62/Bio/Blast/NCBIStandalone.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Blast/NCBIStandalone.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,7 +5,7 @@
# Patches by Mike Poidinger to support multiple databases.
# Updated by Peter Cock in 2007 to do a better job on BLAST 2.2.15
-"""Code for calling standalone BLAST and parsing plain text output (OBSOLETE).
+"""Code for calling standalone BLAST and parsing plain text output (DEPRECATED).
Rather than parsing the human readable plain text BLAST output (which seems to
change with every update to BLAST), we and the NBCI recommend you parse the
@@ -39,9 +39,9 @@
_ParametersConsumer Consumes parameters information.
Functions:
-blastall Execute blastall (OBSOLETE).
-blastpgp Execute blastpgp (OBSOLETE).
-rpsblast Execute rpsblast (OBSOLETE).
+blastall Execute blastall.
+blastpgp Execute blastpgp.
+rpsblast Execute rpsblast.
For calling the BLAST command line tools, we encourage you to use the
command line wrappers in Bio.Blast.Applications - the three functions
@@ -49,14 +49,16 @@
are likely to be deprecated and then removed in future releases.
"""
-import warnings
-warnings.warn("The plain text parser in this module still works at the time of writing, but is considered obsolete and updating it to cope with the latest versions of BLAST is not a priority for us.", PendingDeprecationWarning)
+from __future__ import print_function
from Bio import BiopythonDeprecationWarning
+import warnings
+warnings.warn("This module has been deprecated. Consider Bio.SearchIO for "
+ "parsing BLAST output instead.", BiopythonDeprecationWarning)
import os
import re
-import StringIO
+from Bio._py3k import StringIO
from Bio import File
from Bio.ParserSupport import *
@@ -177,7 +179,7 @@
consumer.reference, start='Reference'):
# References are normally multiline terminated by a blank line
# (or, based on the old code, the RID line)
- while 1:
+ while True:
line = uhandle.readline()
if is_blank_line(line):
consumer.noevent(line)
@@ -343,7 +345,7 @@
contains='No hits found')
try:
read_and_call_while(uhandle, consumer.noevent, blank=1)
- except ValueError, err:
+ except ValueError as err:
if str(err) != "Unexpected end of stream.":
raise err
@@ -435,7 +437,7 @@
self._scan_alignment_header(uhandle, consumer)
# Scan a bunch of score/alignment pairs.
- while 1:
+ while True:
if self._eof(uhandle):
#Shouldn't have issued that _scan_alignment_header event...
break
@@ -457,7 +459,7 @@
# ...
# Length=428
read_and_call(uhandle, consumer.title, start='>')
- while 1:
+ while True:
line = safe_readline(uhandle)
if line.lstrip().startswith('Length =') \
or line.lstrip().startswith('Length='):
@@ -505,7 +507,7 @@
# Sbjct: 70 PNIIQLKD 77
#
- while 1:
+ while True:
# Blastn adds an extra line filled with spaces before Query
attempt_read_and_call(uhandle, consumer.noevent, start=' ')
read_and_call(uhandle, consumer.query, start='Query')
@@ -513,7 +515,7 @@
read_and_call(uhandle, consumer.sbjct, start='Sbjct')
try:
read_and_call_while(uhandle, consumer.noevent, blank=1)
- except ValueError, err:
+ except ValueError as err:
if str(err) != "Unexpected end of stream.":
raise err
# End of File (well, it looks like it with recent versions
@@ -527,7 +529,7 @@
def _scan_masterslave_alignment(self, uhandle, consumer):
consumer.start_alignment()
- while 1:
+ while True:
line = safe_readline(uhandle)
# Check to see whether I'm finished reading the alignment.
# This is indicated by 1) database section, 2) next psi-blast
@@ -551,7 +553,7 @@
def _eof(self, uhandle):
try:
line = safe_peekline(uhandle)
- except ValueError, err:
+ except ValueError as err:
if str(err) != "Unexpected end of stream.":
raise err
line = ""
@@ -644,7 +646,7 @@
# file.
try:
read_and_call_while(uhandle, consumer.noevent, blank=1)
- except ValueError, x:
+ except ValueError as x:
if str(x) != "Unexpected end of stream.":
raise
consumer.end_database_report()
@@ -1003,7 +1005,7 @@
def length(self, line):
#e.g. "Length = 81" or more recently, "Length=428"
- parts = line.replace(" ","").split("=")
+ parts = line.replace(" ", "").split("=")
assert len(parts)==2, "Unrecognised format length line"
self._alignment.length = parts[1]
self._alignment.length = _safe_int(self._alignment.length)
@@ -1310,15 +1312,13 @@
self._dr.num_sequences_in_database.append(_safe_int(sequences))
def ka_params(self, line):
- x = line.split()
- self._dr.ka_params = map(_safe_float, x)
+ self._dr.ka_params = [_safe_float(x) for x in line.split()]
def gapped(self, line):
self._dr.gapped = 1
def ka_params_gap(self, line):
- x = line.split()
- self._dr.ka_params_gap = map(_safe_float, x)
+ self._dr.ka_params_gap = [_safe_float(x) for x in line.split()]
def end_database_report(self):
pass
@@ -1332,9 +1332,8 @@
self._params.matrix = line[8:].rstrip()
def gap_penalties(self, line):
- x = _get_cols(
- line, (3, 5), ncols=6, expected={2:"Existence:", 4:"Extension:"})
- self._params.gap_penalties = map(_safe_float, x)
+ self._params.gap_penalties = [_safe_float(x) for x in _get_cols(
+ line, (3, 5), ncols=6, expected={2:"Existence:", 4:"Extension:"})]
def num_hits(self, line):
if '1st pass' in line:
@@ -1637,7 +1636,7 @@
self._parser = parser
self._header = []
- def next(self):
+ def __next__(self):
"""next(self) -> object
Return the next Blast record from the file. If no more records,
@@ -1646,7 +1645,7 @@
"""
lines = []
query = False
- while 1:
+ while True:
line = self._uhandle.readline()
if not line:
break
@@ -1682,11 +1681,21 @@
data = ''.join(lines)
if self._parser is not None:
- return self._parser.parse(StringIO.StringIO(data))
+ return self._parser.parse(StringIO(data))
return data
+ if sys.version_info[0] < 3:
+ def next(self):
+ """Deprecated Python 2 style alias for Python 3 style __next__ method."""
+ import warnings
+ from Bio import BiopythonDeprecationWarning
+ warnings.warn("Please use next(my_iterator) instead of my_iterator.next(), "
+ "the .next() method is deprecated and will be removed in a "
+ "future release of Biopython.", BiopythonDeprecationWarning)
+ return self.__next__()
+
def __iter__(self):
- return iter(self.next, None)
+ return iter(self.__next__, None)
def blastall(blastcmd, program, database, infile, align_view='7', **keywds):
@@ -1753,50 +1762,49 @@
_security_check_parameters(keywds)
att2param = {
- 'matrix' : '-M',
- 'gap_open' : '-G',
- 'gap_extend' : '-E',
- 'nuc_match' : '-r',
- 'nuc_mismatch' : '-q',
- 'query_genetic_code' : '-Q',
- 'db_genetic_code' : '-D',
-
- 'gapped' : '-g',
- 'expectation' : '-e',
- 'wordsize' : '-W',
- 'strands' : '-S',
- 'keep_hits' : '-K',
- 'xdrop' : '-X',
- 'hit_extend' : '-f',
- 'region_length' : '-L',
- 'db_length' : '-z',
- 'search_length' : '-Y',
-
- 'program' : '-p',
- 'database' : '-d',
- 'infile' : '-i',
- 'filter' : '-F',
- 'believe_query' : '-J',
- 'restrict_gi' : '-l',
- 'nprocessors' : '-a',
- 'oldengine' : '-V',
-
- 'html' : '-T',
- 'descriptions' : '-v',
- 'alignments' : '-b',
- 'align_view' : '-m',
- 'show_gi' : '-I',
- 'seqalign_file' : '-O',
- 'outfile' : '-o',
+ 'matrix': '-M',
+ 'gap_open': '-G',
+ 'gap_extend': '-E',
+ 'nuc_match': '-r',
+ 'nuc_mismatch': '-q',
+ 'query_genetic_code': '-Q',
+ 'db_genetic_code': '-D',
+
+ 'gapped': '-g',
+ 'expectation': '-e',
+ 'wordsize': '-W',
+ 'strands': '-S',
+ 'keep_hits': '-K',
+ 'xdrop': '-X',
+ 'hit_extend': '-f',
+ 'region_length': '-L',
+ 'db_length': '-z',
+ 'search_length': '-Y',
+
+ 'program': '-p',
+ 'database': '-d',
+ 'infile': '-i',
+ 'filter': '-F',
+ 'believe_query': '-J',
+ 'restrict_gi': '-l',
+ 'nprocessors': '-a',
+ 'oldengine': '-V',
+
+ 'html': '-T',
+ 'descriptions': '-v',
+ 'alignments': '-b',
+ 'align_view': '-m',
+ 'show_gi': '-I',
+ 'seqalign_file': '-O',
+ 'outfile': '-o',
}
- warnings.warn("This function is deprecated; you are encouraged to the command line wrapper Bio.Blast.Applications.BlastallCommandline instead.", BiopythonDeprecationWarning)
- from Applications import BlastallCommandline
+ from .Applications import BlastallCommandline
cline = BlastallCommandline(blastcmd)
cline.set_parameter(att2param['program'], program)
cline.set_parameter(att2param['database'], database)
cline.set_parameter(att2param['infile'], infile)
cline.set_parameter(att2param['align_view'], str(align_view))
- for key, value in keywds.iteritems():
+ for key, value in keywds.items():
cline.set_parameter(att2param[key], str(value))
return _invoke_blast(cline)
@@ -1872,61 +1880,59 @@
align_infile Input alignment file for PSI-BLAST restart.
"""
-
- warnings.warn("This function is deprecated; you are encouraged to the command line wrapper Bio.Blast.Applications.BlastpgpCommandline instead.", BiopythonDeprecationWarning)
_security_check_parameters(keywds)
att2param = {
- 'matrix' : '-M',
- 'gap_open' : '-G',
- 'gap_extend' : '-E',
- 'window_size' : '-A',
- 'npasses' : '-j',
- 'passes' : '-P',
-
- 'gapped' : '-g',
- 'expectation' : '-e',
- 'wordsize' : '-W',
- 'keep_hits' : '-K',
- 'xdrop' : '-X',
- 'hit_extend' : '-f',
- 'region_length' : '-L',
- 'db_length' : '-Z',
- 'search_length' : '-Y',
- 'nbits_gapping' : '-N',
- 'pseudocounts' : '-c',
- 'xdrop_final' : '-Z',
- 'xdrop_extension' : '-y',
- 'model_threshold' : '-h',
- 'required_start' : '-S',
- 'required_end' : '-H',
-
- 'program' : '-p',
- 'database' : '-d',
- 'infile' : '-i',
- 'filter' : '-F',
- 'believe_query' : '-J',
- 'nprocessors' : '-a',
-
- 'html' : '-T',
- 'descriptions' : '-v',
- 'alignments' : '-b',
- 'align_view' : '-m',
- 'show_gi' : '-I',
- 'seqalign_file' : '-O',
- 'align_outfile' : '-o',
- 'checkpoint_outfile' : '-C',
- 'restart_infile' : '-R',
- 'hit_infile' : '-k',
- 'matrix_outfile' : '-Q',
- 'align_infile' : '-B',
+ 'matrix': '-M',
+ 'gap_open': '-G',
+ 'gap_extend': '-E',
+ 'window_size': '-A',
+ 'npasses': '-j',
+ 'passes': '-P',
+
+ 'gapped': '-g',
+ 'expectation': '-e',
+ 'wordsize': '-W',
+ 'keep_hits': '-K',
+ 'xdrop': '-X',
+ 'hit_extend': '-f',
+ 'region_length': '-L',
+ 'db_length': '-Z',
+ 'search_length': '-Y',
+ 'nbits_gapping': '-N',
+ 'pseudocounts': '-c',
+ 'xdrop_final': '-Z',
+ 'xdrop_extension': '-y',
+ 'model_threshold': '-h',
+ 'required_start': '-S',
+ 'required_end': '-H',
+
+ 'program': '-p',
+ 'database': '-d',
+ 'infile': '-i',
+ 'filter': '-F',
+ 'believe_query': '-J',
+ 'nprocessors': '-a',
+
+ 'html': '-T',
+ 'descriptions': '-v',
+ 'alignments': '-b',
+ 'align_view': '-m',
+ 'show_gi': '-I',
+ 'seqalign_file': '-O',
+ 'align_outfile': '-o',
+ 'checkpoint_outfile': '-C',
+ 'restart_infile': '-R',
+ 'hit_infile': '-k',
+ 'matrix_outfile': '-Q',
+ 'align_infile': '-B',
}
- from Applications import BlastpgpCommandline
+ from .Applications import BlastpgpCommandline
cline = BlastpgpCommandline(blastcmd)
cline.set_parameter(att2param['database'], database)
cline.set_parameter(att2param['infile'], infile)
cline.set_parameter(att2param['align_view'], str(align_view))
- for key, value in keywds.iteritems():
+ for key, value in keywds.items():
cline.set_parameter(att2param[key], str(value))
return _invoke_blast(cline)
@@ -1992,46 +1998,44 @@
omitted standard output is used (which you can access
from the returned handles).
"""
-
- warnings.warn("This function is deprecated; you are encouraged to the command line wrapper Bio.Blast.Applications.BlastrpsCommandline instead.", BiopythonDeprecationWarning)
_security_check_parameters(keywds)
att2param = {
- 'multihit' : '-P',
- 'gapped' : '-g',
- 'expectation' : '-e',
- 'range_restriction' : '-L',
- 'xdrop' : '-X',
- 'xdrop_final' : '-Z',
- 'xdrop_extension' : '-y',
- 'search_length' : '-Y',
- 'nbits_gapping' : '-N',
- 'protein' : '-p',
- 'db_length' : '-z',
-
- 'database' : '-d',
- 'infile' : '-i',
- 'filter' : '-F',
- 'case_filter' : '-U',
- 'believe_query' : '-J',
- 'nprocessors' : '-a',
- 'logfile' : '-l',
-
- 'html' : '-T',
- 'descriptions' : '-v',
- 'alignments' : '-b',
- 'align_view' : '-m',
- 'show_gi' : '-I',
- 'seqalign_file' : '-O',
- 'align_outfile' : '-o',
+ 'multihit': '-P',
+ 'gapped': '-g',
+ 'expectation': '-e',
+ 'range_restriction': '-L',
+ 'xdrop': '-X',
+ 'xdrop_final': '-Z',
+ 'xdrop_extension': '-y',
+ 'search_length': '-Y',
+ 'nbits_gapping': '-N',
+ 'protein': '-p',
+ 'db_length': '-z',
+
+ 'database': '-d',
+ 'infile': '-i',
+ 'filter': '-F',
+ 'case_filter': '-U',
+ 'believe_query': '-J',
+ 'nprocessors': '-a',
+ 'logfile': '-l',
+
+ 'html': '-T',
+ 'descriptions': '-v',
+ 'alignments': '-b',
+ 'align_view': '-m',
+ 'show_gi': '-I',
+ 'seqalign_file': '-O',
+ 'align_outfile': '-o',
}
- from Applications import RpsBlastCommandline
+ from .Applications import RpsBlastCommandline
cline = RpsBlastCommandline(blastcmd)
cline.set_parameter(att2param['database'], database)
cline.set_parameter(att2param['infile'], infile)
cline.set_parameter(att2param['align_view'], str(align_view))
- for key, value in keywds.iteritems():
+ for key, value in keywds.items():
cline.set_parameter(att2param[key], str(value))
return _invoke_blast(cline)
@@ -2135,7 +2139,7 @@
for appending a command line), or ">", "<" or "|" (redirection)
and if any are found raises an exception.
"""
- for key, value in param_dict.iteritems():
+ for key, value in param_dict.items():
str_value = str(value) # Could easily be an int or a float
for bad_str in [";", "&&", ">", "<", "|"]:
if bad_str in str_value:
@@ -2192,7 +2196,7 @@
results = handle.read()
try:
- self._scanner.feed(StringIO.StringIO(results), self._consumer)
+ self._scanner.feed(StringIO(results), self._consumer)
except ValueError:
# if we have a bad_report_file, save the info to it first
if self._bad_report_handle:
@@ -2201,7 +2205,7 @@
# now we want to try and diagnose the error
self._diagnose_error(
- StringIO.StringIO(results), self._consumer.data)
+ StringIO(results), self._consumer.data)
# if we got here we can't figure out the problem
# so we should pass along the syntax error we got
diff -Nru python-biopython-1.62/Bio/Blast/NCBIWWW.py python-biopython-1.63/Bio/Blast/NCBIWWW.py
--- python-biopython-1.62/Bio/Blast/NCBIWWW.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Blast/NCBIWWW.py 2013-12-05 14:10:43.000000000 +0000
@@ -6,7 +6,8 @@
# Patched by Brad Chapman.
# Chris Wroe added modifications for work in myGrid
-"""
+"""Code to invoke the NCBI BLAST server over the internet.
+
This module provides code to work with the WWW version of BLAST
provided by the NCBI.
http://blast.ncbi.nlm.nih.gov/
@@ -15,28 +16,29 @@
qblast Do a BLAST search using the QBLAST API.
"""
-try:
- from cStringIO import StringIO
-except ImportError:
- from StringIO import StringIO
+from __future__ import print_function
+from Bio._py3k import StringIO
from Bio._py3k import _as_string, _as_bytes
+from Bio._py3k import urlopen as _urlopen
+from Bio._py3k import urlencode as _urlencode
+from Bio._py3k import Request as _Request
def qblast(program, database, sequence,
- auto_format=None,composition_based_statistics=None,
- db_genetic_code=None,endpoints=None,entrez_query='(none)',
- expect=10.0,filter=None,gapcosts=None,genetic_code=None,
- hitlist_size=50,i_thresh=None,layout=None,lcase_mask=None,
- matrix_name=None,nucl_penalty=None,nucl_reward=None,
- other_advanced=None,perc_ident=None,phi_pattern=None,
- query_file=None,query_believe_defline=None,query_from=None,
- query_to=None,searchsp_eff=None,service=None,threshold=None,
- ungapped_alignment=None,word_size=None,
- alignments=500,alignment_view=None,descriptions=500,
- entrez_links_new_window=None,expect_low=None,expect_high=None,
- format_entrez_query=None,format_object=None,format_type='XML',
- ncbi_gi=None,results_file=None,show_overview=None, megablast=None,
+ auto_format=None, composition_based_statistics=None,
+ db_genetic_code=None, endpoints=None, entrez_query='(none)',
+ expect=10.0, filter=None, gapcosts=None, genetic_code=None,
+ hitlist_size=50, i_thresh=None, layout=None, lcase_mask=None,
+ matrix_name=None, nucl_penalty=None, nucl_reward=None,
+ other_advanced=None, perc_ident=None, phi_pattern=None,
+ query_file=None, query_believe_defline=None, query_from=None,
+ query_to=None, searchsp_eff=None, service=None, threshold=None,
+ ungapped_alignment=None, word_size=None,
+ alignments=500, alignment_view=None, descriptions=500,
+ entrez_links_new_window=None, expect_low=None, expect_high=None,
+ format_entrez_query=None, format_object=None, format_type='XML',
+ ncbi_gi=None, results_file=None, show_overview=None, megablast=None,
):
"""Do a BLAST search using the QBLAST server at NCBI.
@@ -62,8 +64,6 @@
http://www.ncbi.nlm.nih.gov/BLAST/Doc/urlapi.html
"""
- import urllib
- import urllib2
import time
assert program in ['blastn', 'blastp', 'blastx', 'tblastn', 'tblastx']
@@ -74,76 +74,76 @@
# To perform a PSI-BLAST or PHI-BLAST search the service ("Put" and "Get" commands) must be specified
# (e.g. psi_blast = NCBIWWW.qblast("blastp", "refseq_protein", input_sequence, service="psi"))
parameters = [
- ('AUTO_FORMAT',auto_format),
- ('COMPOSITION_BASED_STATISTICS',composition_based_statistics),
- ('DATABASE',database),
- ('DB_GENETIC_CODE',db_genetic_code),
- ('ENDPOINTS',endpoints),
- ('ENTREZ_QUERY',entrez_query),
- ('EXPECT',expect),
- ('FILTER',filter),
- ('GAPCOSTS',gapcosts),
- ('GENETIC_CODE',genetic_code),
- ('HITLIST_SIZE',hitlist_size),
- ('I_THRESH',i_thresh),
- ('LAYOUT',layout),
- ('LCASE_MASK',lcase_mask),
- ('MEGABLAST',megablast),
- ('MATRIX_NAME',matrix_name),
- ('NUCL_PENALTY',nucl_penalty),
- ('NUCL_REWARD',nucl_reward),
- ('OTHER_ADVANCED',other_advanced),
- ('PERC_IDENT',perc_ident),
- ('PHI_PATTERN',phi_pattern),
- ('PROGRAM',program),
+ ('AUTO_FORMAT', auto_format),
+ ('COMPOSITION_BASED_STATISTICS', composition_based_statistics),
+ ('DATABASE', database),
+ ('DB_GENETIC_CODE', db_genetic_code),
+ ('ENDPOINTS', endpoints),
+ ('ENTREZ_QUERY', entrez_query),
+ ('EXPECT', expect),
+ ('FILTER', filter),
+ ('GAPCOSTS', gapcosts),
+ ('GENETIC_CODE', genetic_code),
+ ('HITLIST_SIZE', hitlist_size),
+ ('I_THRESH', i_thresh),
+ ('LAYOUT', layout),
+ ('LCASE_MASK', lcase_mask),
+ ('MEGABLAST', megablast),
+ ('MATRIX_NAME', matrix_name),
+ ('NUCL_PENALTY', nucl_penalty),
+ ('NUCL_REWARD', nucl_reward),
+ ('OTHER_ADVANCED', other_advanced),
+ ('PERC_IDENT', perc_ident),
+ ('PHI_PATTERN', phi_pattern),
+ ('PROGRAM', program),
#('PSSM',pssm), - It is possible to use PSI-BLAST via this API?
- ('QUERY',sequence),
- ('QUERY_FILE',query_file),
- ('QUERY_BELIEVE_DEFLINE',query_believe_defline),
- ('QUERY_FROM',query_from),
- ('QUERY_TO',query_to),
+ ('QUERY', sequence),
+ ('QUERY_FILE', query_file),
+ ('QUERY_BELIEVE_DEFLINE', query_believe_defline),
+ ('QUERY_FROM', query_from),
+ ('QUERY_TO', query_to),
#('RESULTS_FILE',...), - Can we use this parameter?
- ('SEARCHSP_EFF',searchsp_eff),
- ('SERVICE',service),
- ('THRESHOLD',threshold),
- ('UNGAPPED_ALIGNMENT',ungapped_alignment),
- ('WORD_SIZE',word_size),
+ ('SEARCHSP_EFF', searchsp_eff),
+ ('SERVICE', service),
+ ('THRESHOLD', threshold),
+ ('UNGAPPED_ALIGNMENT', ungapped_alignment),
+ ('WORD_SIZE', word_size),
('CMD', 'Put'),
]
query = [x for x in parameters if x[1] is not None]
- message = _as_bytes(urllib.urlencode(query))
+ message = _as_bytes(_urlencode(query))
# Send off the initial query to qblast.
# Note the NCBI do not currently impose a rate limit here, other
# than the request not to make say 50 queries at once using multiple
# threads.
- request = urllib2.Request("http://blast.ncbi.nlm.nih.gov/Blast.cgi",
- message,
- {"User-Agent":"BiopythonClient"})
- handle = urllib2.urlopen(request)
+ request = _Request("http://blast.ncbi.nlm.nih.gov/Blast.cgi",
+ message,
+ {"User-Agent":"BiopythonClient"})
+ handle = _urlopen(request)
# Format the "Get" command, which gets the formatted results from qblast
# Parameters taken from http://www.ncbi.nlm.nih.gov/BLAST/Doc/node6.html on 9 July 2007
rid, rtoe = _parse_qblast_ref_page(handle)
parameters = [
- ('ALIGNMENTS',alignments),
- ('ALIGNMENT_VIEW',alignment_view),
- ('DESCRIPTIONS',descriptions),
- ('ENTREZ_LINKS_NEW_WINDOW',entrez_links_new_window),
- ('EXPECT_LOW',expect_low),
- ('EXPECT_HIGH',expect_high),
- ('FORMAT_ENTREZ_QUERY',format_entrez_query),
- ('FORMAT_OBJECT',format_object),
- ('FORMAT_TYPE',format_type),
- ('NCBI_GI',ncbi_gi),
- ('RID',rid),
- ('RESULTS_FILE',results_file),
- ('SERVICE',service),
- ('SHOW_OVERVIEW',show_overview),
+ ('ALIGNMENTS', alignments),
+ ('ALIGNMENT_VIEW', alignment_view),
+ ('DESCRIPTIONS', descriptions),
+ ('ENTREZ_LINKS_NEW_WINDOW', entrez_links_new_window),
+ ('EXPECT_LOW', expect_low),
+ ('EXPECT_HIGH', expect_high),
+ ('FORMAT_ENTREZ_QUERY', format_entrez_query),
+ ('FORMAT_OBJECT', format_object),
+ ('FORMAT_TYPE', format_type),
+ ('NCBI_GI', ncbi_gi),
+ ('RID', rid),
+ ('RESULTS_FILE', results_file),
+ ('SERVICE', service),
+ ('SHOW_OVERVIEW', show_overview),
('CMD', 'Get'),
]
query = [x for x in parameters if x[1] is not None]
- message = _as_bytes(urllib.urlencode(query))
+ message = _as_bytes(_urlencode(query))
# Poll NCBI until the results are ready. Use a 3 second wait
delay = 3.0
@@ -157,10 +157,10 @@
else:
previous = current
- request = urllib2.Request("http://blast.ncbi.nlm.nih.gov/Blast.cgi",
- message,
- {"User-Agent":"BiopythonClient"})
- handle = urllib2.urlopen(request)
+ request = _Request("http://blast.ncbi.nlm.nih.gov/Blast.cgi",
+ message,
+ {"User-Agent":"BiopythonClient"})
+ handle = _urlopen(request)
results = _as_string(handle.read())
# Can see an "\n\n" page while results are in progress,
@@ -211,21 +211,21 @@
i = s.find('')
if i != -1:
msg = s[i+len('
'):].strip()
- msg = msg.split("
",1)[0].split("\n",1)[0].strip()
+ msg = msg.split("
", 1)[0].split("\n", 1)[0].strip()
if msg:
raise ValueError("Error message from NCBI: %s" % msg)
#In spring 2010 the markup was like this:
i = s.find('')
if i != -1:
msg = s[i+len('
'):].strip()
- msg = msg.split("
",1)[0].split("\n",1)[0].strip()
+ msg = msg.split("
", 1)[0].split("\n", 1)[0].strip()
if msg:
raise ValueError("Error message from NCBI: %s" % msg)
#Generic search based on the way the error messages start:
i = s.find('Message ID#')
if i != -1:
#Break the message at the first HTML tag
- msg = s[i:].split("<",1)[0].split("\n",1)[0].strip()
+ msg = s[i:].split("<", 1)[0].split("\n", 1)[0].strip()
raise ValueError("Error message from NCBI: %s" % msg)
#We didn't recognise the error layout :(
#print s
diff -Nru python-biopython-1.62/Bio/Blast/NCBIXML.py python-biopython-1.63/Bio/Blast/NCBIXML.py
--- python-biopython-1.62/Bio/Blast/NCBIXML.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Blast/NCBIXML.py 2013-12-05 14:10:43.000000000 +0000
@@ -21,9 +21,12 @@
Blast records. It uses the BlastParser internally.
read Returns a single Blast record. Uses the BlastParser internally.
"""
+from __future__ import print_function
+
from Bio.Blast import Record
import xml.sax
from xml.sax.handler import ContentHandler
+from functools import reduce
class _XMLparser(ContentHandler):
@@ -70,11 +73,11 @@
if hasattr(self, method):
eval("self.%s()" % method)
if self._debug > 4:
- print "NCBIXML: Parsed: " + method
+ print("NCBIXML: Parsed: " + method)
elif self._debug > 3:
# Doesn't exist (yet) and may want to warn about it
if method not in self._debug_ignore_list:
- print "NCBIXML: Ignored: " + method
+ print("NCBIXML: Ignored: " + method)
self._debug_ignore_list.append(method)
#We don't care about white space in parent tags like Hsp,
@@ -105,11 +108,11 @@
if hasattr(self, method):
eval("self.%s()" % method)
if self._debug > 2:
- print "NCBIXML: Parsed: " + method, self._value
+ print("NCBIXML: Parsed: %s %s" % (method, self._value))
elif self._debug > 1:
# Doesn't exist (yet) and may want to warn about it
if method not in self._debug_ignore_list:
- print "NCBIXML: Ignored: " + method, self._value
+ print("NCBIXML: Ignored: %s %s" % (method, self._value))
self._debug_ignore_list.append(method)
# Reset character buffer
@@ -209,7 +212,7 @@
self._blast = None
if self._debug:
- print "NCBIXML: Added Blast record to results"
+ print("NCBIXML: Added Blast record to results")
# Header
def _end_BlastOutput_program(self):
@@ -575,13 +578,13 @@
"""
iterator = parse(handle, debug)
try:
- first = iterator.next()
+ first = next(iterator)
except StopIteration:
first = None
if first is None:
raise ValueError("No records found in handle")
try:
- second = iterator.next()
+ second = next(iterator)
except StopIteration:
second = None
if second is not None:
@@ -659,7 +662,7 @@
# one XML file for each query!
# Finish the old file:
- text, pending = (text+pending).split("\n" + XML_START,1)
+ text, pending = (text+pending).split("\n" + XML_START, 1)
pending = XML_START + pending
expat_parser.Parse(text, True) # End of XML record
@@ -689,28 +692,28 @@
if __name__ == '__main__':
import sys
- handle = open(sys.argv[1])
- r_list = parse(handle)
+ with open(sys.argv[1]) as handle:
+ r_list = parse(handle)
for r in r_list:
# Small test
- print 'Blast of', r.query
- print 'Found %s alignments with a total of %s HSPs' % (len(r.alignments),
- reduce(lambda a,b: a+b,
- [len(a.hsps) for a in r.alignments]))
+ print('Blast of %s' % r.query)
+ print('Found %s alignments with a total of %s HSPs' % (len(r.alignments),
+ reduce(lambda a, b: a+b,
+ [len(a.hsps) for a in r.alignments])))
for al in r.alignments:
- print al.title[:50], al.length, 'bp', len(al.hsps), 'HSPs'
+ print("%s %i bp %i HSPs" % (al.title[:50], al.length, len(al.hsps)))
# Cookbook example
E_VALUE_THRESH = 0.04
for alignment in r.alignments:
for hsp in alignment.hsps:
if hsp.expect < E_VALUE_THRESH:
- print '*****'
- print 'sequence', alignment.title
- print 'length', alignment.length
- print 'e value', hsp.expect
- print hsp.query[:75] + '...'
- print hsp.match[:75] + '...'
- print hsp.sbjct[:75] + '...'
+ print('*****')
+ print('sequence %s' % alignment.title)
+ print('length %i' % alignment.length)
+ print('e value %f' % hsp.expect)
+ print(hsp.query[:75] + '...')
+ print(hsp.match[:75] + '...')
+ print(hsp.sbjct[:75] + '...')
diff -Nru python-biopython-1.62/Bio/Blast/ParseBlastTable.py python-biopython-1.63/Bio/Blast/ParseBlastTable.py
--- python-biopython-1.62/Bio/Blast/ParseBlastTable.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Blast/ParseBlastTable.py 2013-12-05 14:10:43.000000000 +0000
@@ -8,9 +8,10 @@
Returns a BlastTableRec instance
"""
+import sys
class BlastTableEntry(object):
- def __init__(self,in_rec):
+ def __init__(self, in_rec):
bt_fields = in_rec.split()
self.qid = bt_fields[0].split('|')
self.sid = bt_fields[1].split('|')
@@ -55,7 +56,7 @@
self._n = 0
self._in_header = 1
- def next(self):
+ def __next__(self):
self.table_record = BlastTableRec()
self._n += 1
inline = self._lookahead
@@ -76,6 +77,16 @@
self._in_header = 1
return self.table_record
+ if sys.version_info[0] < 3:
+ def next(self):
+ """Deprecated Python 2 style alias for Python 3 style __next__ method."""
+ import warnings
+ from Bio import BiopythonDeprecationWarning
+ warnings.warn("Please use next(my_iterator) instead of my_iterator.next(), "
+ "the .next() method is deprecated and will be removed in a "
+ "future release of Biopython.", BiopythonDeprecationWarning)
+ return self.__next__()
+
def _consume_entry(self, inline):
current_entry = BlastTableEntry(inline)
self.table_record.add_entry(current_entry)
@@ -83,7 +94,7 @@
def _consume_header(self, inline):
for keyword in reader_keywords:
if keyword in inline:
- in_header = self._Parse('_parse_%s' % reader_keywords[keyword],inline)
+ in_header = self._Parse('_parse_%s' % reader_keywords[keyword], inline)
break
return in_header
@@ -110,4 +121,4 @@
return 0
def _Parse(self, method_name, inline):
- return getattr(self,method_name)(inline)
+ return getattr(self, method_name)(inline)
diff -Nru python-biopython-1.62/Bio/Blast/Record.py python-biopython-1.63/Bio/Blast/Record.py
--- python-biopython-1.62/Bio/Blast/Record.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Blast/Record.py 2013-12-05 14:10:43.000000000 +0000
@@ -238,7 +238,7 @@
n += 1
generic = Generic.Alignment(alphabet)
- for (name,seq) in zip(seq_names,seq_parts):
+ for (name, seq) in zip(seq_names, seq_parts):
generic.add_sequence(name, seq)
return generic
diff -Nru python-biopython-1.62/Bio/Cluster/__init__.py python-biopython-1.63/Bio/Cluster/__init__.py
--- python-biopython-1.62/Bio/Cluster/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Cluster/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
import numpy
from Bio.Cluster.cluster import *
@@ -66,44 +71,43 @@
extension = ".atr"
keyword = "ARRY"
nnodes = len(tree)
- outputfile = open(jobname+extension, "w")
- nodeindex = 0
- nodeID = [''] * nnodes
- nodecounts = numpy.zeros(nnodes, int)
- nodeorder = numpy.zeros(nnodes)
- nodedist = numpy.array([node.distance for node in tree])
- for nodeindex in range(nnodes):
- min1 = tree[nodeindex].left
- min2 = tree[nodeindex].right
- nodeID[nodeindex] = "NODE%dX" % (nodeindex+1)
- outputfile.write(nodeID[nodeindex])
- outputfile.write("\t")
- if min1 < 0:
- index1 = -min1-1
- order1 = nodeorder[index1]
- counts1 = nodecounts[index1]
- outputfile.write(nodeID[index1]+"\t")
- nodedist[nodeindex] = max(nodedist[nodeindex], nodedist[index1])
- else:
- order1 = order[min1]
- counts1 = 1
- outputfile.write("%s%dX\t" % (keyword, min1))
- if min2 < 0:
- index2 = -min2-1
- order2 = nodeorder[index2]
- counts2 = nodecounts[index2]
- outputfile.write(nodeID[index2]+"\t")
- nodedist[nodeindex] = max(nodedist[nodeindex], nodedist[index2])
- else:
- order2 = order[min2]
- counts2 = 1
- outputfile.write("%s%dX\t" % (keyword, min2))
- outputfile.write(str(1.0-nodedist[nodeindex]))
- outputfile.write("\n")
- counts = counts1 + counts2
- nodecounts[nodeindex] = counts
- nodeorder[nodeindex] = (counts1*order1+counts2*order2) / counts
- outputfile.close()
+ with open(jobname+extension, "w") as outputfile:
+ nodeindex = 0
+ nodeID = [''] * nnodes
+ nodecounts = numpy.zeros(nnodes, int)
+ nodeorder = numpy.zeros(nnodes)
+ nodedist = numpy.array([node.distance for node in tree])
+ for nodeindex in range(nnodes):
+ min1 = tree[nodeindex].left
+ min2 = tree[nodeindex].right
+ nodeID[nodeindex] = "NODE%dX" % (nodeindex+1)
+ outputfile.write(nodeID[nodeindex])
+ outputfile.write("\t")
+ if min1 < 0:
+ index1 = -min1-1
+ order1 = nodeorder[index1]
+ counts1 = nodecounts[index1]
+ outputfile.write(nodeID[index1]+"\t")
+ nodedist[nodeindex] = max(nodedist[nodeindex], nodedist[index1])
+ else:
+ order1 = order[min1]
+ counts1 = 1
+ outputfile.write("%s%dX\t" % (keyword, min1))
+ if min2 < 0:
+ index2 = -min2-1
+ order2 = nodeorder[index2]
+ counts2 = nodecounts[index2]
+ outputfile.write(nodeID[index2]+"\t")
+ nodedist[nodeindex] = max(nodedist[nodeindex], nodedist[index2])
+ else:
+ order2 = order[min2]
+ counts2 = 1
+ outputfile.write("%s%dX\t" % (keyword, min2))
+ outputfile.write(str(1.0-nodedist[nodeindex]))
+ outputfile.write("\n")
+ counts = counts1 + counts2
+ nodecounts[nodeindex] = counts
+ nodeorder[nodeindex] = (counts1*order1+counts2*order2) / counts
# Now set up order based on the tree structure
index = _treesort(order, nodeorder, nodecounts, tree)
return index
@@ -511,7 +515,7 @@
aid = 0
filename = jobname
postfix = ""
- if type(geneclusters) == Tree:
+ if isinstance(geneclusters, Tree):
# This is a hierarchical clustering result.
geneindex = _savetree(jobname, geneclusters, gorder, 0)
gid = 1
@@ -524,7 +528,7 @@
postfix = "_G%d" % k
else:
geneindex = numpy.argsort(gorder)
- if type(expclusters) == Tree:
+ if isinstance(expclusters, Tree):
# This is a hierarchical clustering result.
expindex = _savetree(jobname, expclusters, eorder, 1)
aid = 1
@@ -548,24 +552,20 @@
else:
label = "ARRAY"
names = self.expid
- try:
- outputfile = open(filename, "w")
- except IOError:
- raise IOError("Unable to open output file")
- outputfile.write(label + "\tGROUP\n")
- index = numpy.argsort(order)
- n = len(names)
- sortedindex = numpy.zeros(n, int)
- counter = 0
- cluster = 0
- while counter < n:
- for j in index:
- if clusterids[j] == cluster:
- outputfile.write("%s\t%s\n" % (names[j], cluster))
- sortedindex[counter] = j
- counter += 1
- cluster += 1
- outputfile.close()
+ with open(filename, "w") as outputfile:
+ outputfile.write(label + "\tGROUP\n")
+ index = numpy.argsort(order)
+ n = len(names)
+ sortedindex = numpy.zeros(n, int)
+ counter = 0
+ cluster = 0
+ while counter < n:
+ for j in index:
+ if clusterids[j] == cluster:
+ outputfile.write("%s\t%s\n" % (names[j], cluster))
+ sortedindex[counter] = j
+ counter += 1
+ cluster += 1
return sortedindex
def _savedata(self, jobname, gid, aid, geneindex, expindex):
@@ -575,56 +575,52 @@
else:
genename = self.genename
(ngenes, nexps) = numpy.shape(self.data)
- try:
- outputfile = open(jobname+'.cdt', 'w')
- except IOError:
- raise IOError("Unable to open output file")
- if self.mask is not None:
- mask = self.mask
- else:
- mask = numpy.ones((ngenes, nexps), int)
- if self.gweight is not None:
- gweight = self.gweight
- else:
- gweight = numpy.ones(ngenes)
- if self.eweight is not None:
- eweight = self.eweight
- else:
- eweight = numpy.ones(nexps)
- if gid:
- outputfile.write('GID\t')
- outputfile.write(self.uniqid)
- outputfile.write('\tNAME\tGWEIGHT')
- # Now add headers for data columns.
- for j in expindex:
- outputfile.write('\t%s' % self.expid[j])
- outputfile.write('\n')
- if aid:
- outputfile.write("AID")
+ with open(jobname+'.cdt', 'w') as outputfile:
+ if self.mask is not None:
+ mask = self.mask
+ else:
+ mask = numpy.ones((ngenes, nexps), int)
+ if self.gweight is not None:
+ gweight = self.gweight
+ else:
+ gweight = numpy.ones(ngenes)
+ if self.eweight is not None:
+ eweight = self.eweight
+ else:
+ eweight = numpy.ones(nexps)
if gid:
- outputfile.write('\t')
- outputfile.write("\t\t")
+ outputfile.write('GID\t')
+ outputfile.write(self.uniqid)
+ outputfile.write('\tNAME\tGWEIGHT')
+ # Now add headers for data columns.
for j in expindex:
- outputfile.write('\tARRY%dX' % j)
+ outputfile.write('\t%s' % self.expid[j])
outputfile.write('\n')
- outputfile.write('EWEIGHT')
- if gid:
- outputfile.write('\t')
- outputfile.write('\t\t')
- for j in expindex:
- outputfile.write('\t%f' % eweight[j])
- outputfile.write('\n')
- for i in geneindex:
+ if aid:
+ outputfile.write("AID")
+ if gid:
+ outputfile.write('\t')
+ outputfile.write("\t\t")
+ for j in expindex:
+ outputfile.write('\tARRY%dX' % j)
+ outputfile.write('\n')
+ outputfile.write('EWEIGHT')
if gid:
- outputfile.write('GENE%dX\t' % i)
- outputfile.write("%s\t%s\t%f" %
- (self.geneid[i], genename[i], gweight[i]))
- for j in expindex:
outputfile.write('\t')
- if mask[i, j]:
- outputfile.write(str(self.data[i, j]))
+ outputfile.write('\t\t')
+ for j in expindex:
+ outputfile.write('\t%f' % eweight[j])
outputfile.write('\n')
- outputfile.close()
+ for i in geneindex:
+ if gid:
+ outputfile.write('GENE%dX\t' % i)
+ outputfile.write("%s\t%s\t%f" %
+ (self.geneid[i], genename[i], gweight[i]))
+ for j in expindex:
+ outputfile.write('\t')
+ if mask[i, j]:
+ outputfile.write(str(self.data[i, j]))
+ outputfile.write('\n')
def read(handle):
diff -Nru python-biopython-1.62/Bio/Compass/__init__.py python-biopython-1.63/Bio/Compass/__init__.py
--- python-biopython-1.62/Bio/Compass/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Compass/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -27,16 +27,16 @@
def read(handle):
record = None
try:
- line = handle.next()
+ line = next(handle)
record = Record()
__read_names(record, line)
- line = handle.next()
+ line = next(handle)
__read_threshold(record, line)
- line = handle.next()
+ line = next(handle)
__read_lengths(record, line)
- line = handle.next()
+ line = next(handle)
__read_profilewidth(record, line)
- line = handle.next()
+ line = next(handle)
__read_scores(record, line)
except StopIteration:
if not record:
@@ -48,9 +48,9 @@
continue
__read_query_alignment(record, line)
try:
- line = handle.next()
+ line = next(handle)
__read_positive_alignment(record, line)
- line = handle.next()
+ line = next(handle)
__read_hit_alignment(record, line)
except StopIteration:
raise ValueError("Unexpected end of stream.")
@@ -60,20 +60,20 @@
def parse(handle):
record = None
try:
- line = handle.next()
+ line = next(handle)
except StopIteration:
return
while True:
try:
record = Record()
__read_names(record, line)
- line = handle.next()
+ line = next(handle)
__read_threshold(record, line)
- line = handle.next()
+ line = next(handle)
__read_lengths(record, line)
- line = handle.next()
+ line = next(handle)
__read_profilewidth(record, line)
- line = handle.next()
+ line = next(handle)
__read_scores(record, line)
except StopIteration:
raise ValueError("Unexpected end of stream.")
@@ -85,9 +85,9 @@
break
__read_query_alignment(record, line)
try:
- line = handle.next()
+ line = next(handle)
__read_positive_alignment(record, line)
- line = handle.next()
+ line = next(handle)
__read_hit_alignment(record, line)
except StopIteration:
raise ValueError("Unexpected end of stream.")
diff -Nru python-biopython-1.62/Bio/Crystal/__init__.py python-biopython-1.63/Bio/Crystal/__init__.py
--- python-biopython-1.62/Bio/Crystal/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Crystal/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -12,6 +12,10 @@
"""
import copy
+from functools import reduce
+
+from Bio._py3k import map
+from Bio._py3k import basestring
class CrystalError(Exception):
@@ -78,7 +82,7 @@
residues = residues.replace('*', ' ')
residues = residues.strip()
elements = residues.split()
- self.data = map(Hetero, elements)
+ self.data = [Hetero(x) for x in elements]
elif isinstance(residues, list):
for element in residues:
if not isinstance(element, Hetero):
@@ -239,18 +243,14 @@
def __repr__(self):
output = ''
- keys = self.data.keys()
- keys.sort()
- for key in keys:
- output = output + '%s : %s\n' % (key, self.data[ key ])
+ for key in sorted(self.data):
+ output += '%s : %s\n' % (key, self.data[key])
return output
def __str__(self):
output = ''
- keys = self.data.keys()
- keys.sort()
- for key in keys:
- output = output + '%s : %s\n' % (key, self.data[ key ])
+ for key in sorted(self.data):
+ output += '%s : %s\n' % (key, self.data[key])
return output
def tostring(self):
@@ -266,7 +266,7 @@
if isinstance(item, Chain):
self.data[key] = item
elif isinstance(item, str):
- self.data[ key ] = Chain(item)
+ self.data[key] = Chain(item)
else:
raise TypeError
diff -Nru python-biopython-1.62/Bio/Data/CodonTable.py python-biopython-1.63/Bio/Data/CodonTable.py
--- python-biopython-1.62/Bio/Data/CodonTable.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Data/CodonTable.py 2013-12-05 14:10:43.000000000 +0000
@@ -9,6 +9,8 @@
Last updated for Version 3.9
"""
+from __future__ import print_function
+
from Bio import Alphabet
from Bio.Alphabet import IUPAC
from Bio.Data import IUPACData
@@ -66,8 +68,8 @@
e.g.
>>> import Bio.Data.CodonTable
- >>> print Bio.Data.CodonTable.standard_dna_table
- >>> print Bio.Data.CodonTable.generic_by_id[1]
+ >>> print(Bio.Data.CodonTable.standard_dna_table)
+ >>> print(Bio.Data.CodonTable.generic_by_id[1])
"""
if self.id:
@@ -75,7 +77,7 @@
else:
answer = "Table ID unknown"
if self.names:
- answer += " " + ", ".join(filter(None, self.names))
+ answer += " " + ", ".join([x for x in self.names if x])
#Use the main four letters (and the conventional ordering)
#even for ambiguous tables
@@ -89,19 +91,16 @@
letters = "UCAG"
#Build the table...
- answer=answer + "\n\n |" + "|".join(
- [" %s " % c2 for c2 in letters]
- ) + "|"
- answer=answer + "\n--+" \
- + "+".join(["---------" for c2 in letters]) + "+--"
+ answer += "\n\n |" + "|".join(" %s " % c2 for c2 in letters) + "|"
+ answer += "\n--+" + "+".join("---------" for c2 in letters) + "+--"
for c1 in letters:
for c3 in letters:
line = c1 + " |"
for c2 in letters:
codon = c1+c2+c3
- line = line + " %s" % codon
+ line += " %s" % codon
if codon in self.stop_codons:
- line = line + " Stop|"
+ line += " Stop|"
else:
try:
amino = self.forward_table[codon]
@@ -110,13 +109,12 @@
except TranslationError:
amino = "?"
if codon in self.start_codons:
- line = line + " %s(s)|" % amino
+ line += " %s(s)|" % amino
else:
- line = line + " %s |" % amino
- line = line + " " + c3
- answer = answer + "\n"+ line
- answer=answer + "\n--+" \
- + "+".join(["---------" for c2 in letters]) + "+--"
+ line += " %s |" % amino
+ line += " " + c3
+ answer += "\n"+ line
+ answer += "\n--+" + "+".join("---------" for c2 in letters) + "+--"
return answer
@@ -204,7 +202,7 @@
+ "for both proteins and stop codons")
# This is a true stop codon - tell the caller about it
raise KeyError(codon)
- return possible.keys()
+ return list(possible.keys())
def list_ambiguous_codons(codons, ambiguous_nucleotide_values):
@@ -225,14 +223,14 @@
#This will generate things like 'TRR' from ['TAG', 'TGA'], which
#we don't want to include:
c1_list = sorted(letter for (letter, meanings)
- in ambiguous_nucleotide_values.iteritems()
- if set([codon[0] for codon in codons]).issuperset(set(meanings)))
+ in ambiguous_nucleotide_values.items()
+ if set(codon[0] for codon in codons).issuperset(set(meanings)))
c2_list = sorted(letter for (letter, meanings)
- in ambiguous_nucleotide_values.iteritems()
- if set([codon[1] for codon in codons]).issuperset(set(meanings)))
+ in ambiguous_nucleotide_values.items()
+ if set(codon[1] for codon in codons).issuperset(set(meanings)))
c3_list = sorted(letter for (letter, meanings)
- in ambiguous_nucleotide_values.iteritems()
- if set([codon[2] for codon in codons]).issuperset(set(meanings)))
+ in ambiguous_nucleotide_values.items()
+ if set(codon[2] for codon in codons).issuperset(set(meanings)))
#candidates is a list (not a set) to preserve the iteration order
candidates = []
for c1 in c1_list:
@@ -290,13 +288,13 @@
self.ambiguous_protein = ambiguous_protein
inverted = {}
- for name, val in ambiguous_protein.iteritems():
+ for name, val in ambiguous_protein.items():
for c in val:
x = inverted.get(c, {})
x[name] = 1
inverted[c] = x
- for name, val in inverted.iteritems():
- inverted[name] = val.keys()
+ for name, val in inverted.items():
+ inverted[name] = list(val.keys())
self._inverted = inverted
self._cache = {}
@@ -353,7 +351,7 @@
n = len(possible)
possible = []
- for amino, val in ambiguous_possible.iteritems():
+ for amino, val in ambiguous_possible.items():
if val == n:
possible.append(amino)
@@ -384,7 +382,7 @@
"""Turns codon table data into objects, and stores them in the dictionaries (PRIVATE)."""
#In most cases names are divided by "; ", however there is also
#'Bacterial and Plant Plastid' (which used to be just 'Bacterial')
- names = [x.strip() for x in name.replace(" and ","; ").split("; ")]
+ names = [x.strip() for x in name.replace(" and ", "; ").split("; ")]
dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons,
stop_codons)
@@ -398,7 +396,7 @@
# replace all T's with U's for the RNA tables
rna_table = {}
generic_table = {}
- for codon, val in table.iteritems():
+ for codon, val in table.items():
generic_table[codon] = val
codon = codon.replace("T", "U")
generic_table[codon] = val
@@ -422,7 +420,7 @@
generic_start_codons, generic_stop_codons)
#The following isn't very elegant, but seems to work nicely.
- _merged_values = dict(IUPACData.ambiguous_rna_values.iteritems())
+ _merged_values = dict(IUPACData.ambiguous_rna_values.items())
_merged_values["T"] = "U"
ambig_generic = AmbiguousCodonTable(generic,
Alphabet.NucleotideAlphabet(),
@@ -863,9 +861,9 @@
)
#Basic sanity test,
-for key, val in generic_by_name.iteritems():
+for key, val in generic_by_name.items():
assert key in ambiguous_generic_by_name[key].names
-for key, val in generic_by_id.iteritems():
+for key, val in generic_by_id.items():
assert ambiguous_generic_by_id[key].id == key
del key, val
@@ -879,7 +877,7 @@
if "UAA" in unambiguous_rna_by_id[n].stop_codons \
and "UGA" in unambiguous_rna_by_id[n].stop_codons:
try:
- print ambiguous_dna_by_id[n].forward_table["TRA"]
+ print(ambiguous_dna_by_id[n].forward_table["TRA"])
assert False, "Should be a stop only"
except KeyError:
pass
diff -Nru python-biopython-1.62/Bio/Data/IUPACData.py python-biopython-1.63/Bio/Data/IUPACData.py
--- python-biopython-1.62/Bio/Data/IUPACData.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Data/IUPACData.py 2013-12-05 14:10:43.000000000 +0000
@@ -32,15 +32,15 @@
'S': 'Ser', 'T': 'Thr', 'V': 'Val', 'W': 'Trp',
'Y': 'Tyr',
}
-protein_letters_1to3_extended = dict(protein_letters_1to3.items() + {
+protein_letters_1to3_extended = dict(list(protein_letters_1to3.items()) + list({
'B': 'Asx', 'X': 'Xaa', 'Z': 'Glx', 'J': 'Xle',
'U': 'Sel', 'O': 'Pyl',
-}.items())
+}.items()))
-protein_letters_3to1 = dict([(x[1], x[0]) for x in
- protein_letters_1to3.items()])
-protein_letters_3to1_extended = dict([(x[1], x[0]) for x in
- protein_letters_1to3_extended.items()])
+protein_letters_3to1 = dict((x[1], x[0]) for x in
+ protein_letters_1to3.items())
+protein_letters_3to1_extended = dict((x[1], x[0]) for x in
+ protein_letters_1to3_extended.items())
ambiguous_dna_letters = "GATCRYWSMKHBVDN"
unambiguous_dna_letters = "GATC"
@@ -134,7 +134,7 @@
def _make_ranges(mydict):
d = {}
- for key, value in mydict.iteritems():
+ for key, value in mydict.items():
d[key] = (value, value)
return d
@@ -159,12 +159,12 @@
def _make_ambiguous_ranges(mydict, weight_table):
range_d = {}
avg_d = {}
- for letter, values in mydict.iteritems():
+ for letter, values in mydict.items():
#Following line is a quick hack to skip undefined weights for U and O
if len(values) == 1 and values[0] not in weight_table:
continue
- weights = map(weight_table.get, values)
+ weights = [weight_table.get(x) for x in values]
range_d[letter] = (min(weights), max(weights))
total_w = 0.0
for w in weights:
@@ -269,113 +269,113 @@
# For Center of Mass Calculation.
# Taken from http://www.chem.qmul.ac.uk/iupac/AtWt/ & PyMol
atom_weights = {
- 'H' : 1.00794,
- 'He' : 4.002602,
- 'Li' : 6.941,
- 'Be' : 9.012182,
- 'B' : 10.811,
- 'C' : 12.0107,
- 'N' : 14.0067,
- 'O' : 15.9994,
- 'F' : 18.9984032,
- 'Ne' : 20.1797,
- 'Na' : 22.989770,
- 'Mg' : 24.3050,
- 'Al' : 26.981538,
- 'Si' : 28.0855,
- 'P' : 30.973761,
- 'S' : 32.065,
- 'Cl' : 35.453,
- 'Ar' : 39.948,
- 'K' : 39.0983,
- 'Ca' : 40.078,
- 'Sc' : 44.955910,
- 'Ti' : 47.867,
- 'V' : 50.9415,
- 'Cr' : 51.9961,
- 'Mn' : 54.938049,
- 'Fe' : 55.845,
- 'Co' : 58.933200,
- 'Ni' : 58.6934,
- 'Cu' : 63.546,
- 'Zn' : 65.39,
- 'Ga' : 69.723,
- 'Ge' : 72.64,
- 'As' : 74.92160,
- 'Se' : 78.96,
- 'Br' : 79.904,
- 'Kr' : 83.80,
- 'Rb' : 85.4678,
- 'Sr' : 87.62,
- 'Y' : 88.90585,
- 'Zr' : 91.224,
- 'Nb' : 92.90638,
- 'Mo' : 95.94,
- 'Tc' : 98.0,
- 'Ru' : 101.07,
- 'Rh' : 102.90550,
- 'Pd' : 106.42,
- 'Ag' : 107.8682,
- 'Cd' : 112.411,
- 'In' : 114.818,
- 'Sn' : 118.710,
- 'Sb' : 121.760,
- 'Te' : 127.60,
- 'I' : 126.90447,
- 'Xe' : 131.293,
- 'Cs' : 132.90545,
- 'Ba' : 137.327,
- 'La' : 138.9055,
- 'Ce' : 140.116,
- 'Pr' : 140.90765,
- 'Nd' : 144.24,
- 'Pm' : 145.0,
- 'Sm' : 150.36,
- 'Eu' : 151.964,
- 'Gd' : 157.25,
- 'Tb' : 158.92534,
- 'Dy' : 162.50,
- 'Ho' : 164.93032,
- 'Er' : 167.259,
- 'Tm' : 168.93421,
- 'Yb' : 173.04,
- 'Lu' : 174.967,
- 'Hf' : 178.49,
- 'Ta' : 180.9479,
- 'W' : 183.84,
- 'Re' : 186.207,
- 'Os' : 190.23,
- 'Ir' : 192.217,
- 'Pt' : 195.078,
- 'Au' : 196.96655,
- 'Hg' : 200.59,
- 'Tl' : 204.3833,
- 'Pb' : 207.2,
- 'Bi' : 208.98038,
- 'Po' : 208.98,
- 'At' : 209.99,
- 'Rn' : 222.02,
- 'Fr' : 223.02,
- 'Ra' : 226.03,
- 'Ac' : 227.03,
- 'Th' : 232.0381,
- 'Pa' : 231.03588,
- 'U' : 238.02891,
- 'Np' : 237.05,
- 'Pu' : 244.06,
- 'Am' : 243.06,
- 'Cm' : 247.07,
- 'Bk' : 247.07,
- 'Cf' : 251.08,
- 'Es' : 252.08,
- 'Fm' : 257.10,
- 'Md' : 258.10,
- 'No' : 259.10,
- 'Lr' : 262.11,
- 'Rf' : 261.11,
- 'Db' : 262.11,
- 'Sg' : 266.12,
- 'Bh' : 264.12,
- 'Hs' : 269.13,
- 'Mt' : 268.14,
+ 'H': 1.00794,
+ 'He': 4.002602,
+ 'Li': 6.941,
+ 'Be': 9.012182,
+ 'B': 10.811,
+ 'C': 12.0107,
+ 'N': 14.0067,
+ 'O': 15.9994,
+ 'F': 18.9984032,
+ 'Ne': 20.1797,
+ 'Na': 22.989770,
+ 'Mg': 24.3050,
+ 'Al': 26.981538,
+ 'Si': 28.0855,
+ 'P': 30.973761,
+ 'S': 32.065,
+ 'Cl': 35.453,
+ 'Ar': 39.948,
+ 'K': 39.0983,
+ 'Ca': 40.078,
+ 'Sc': 44.955910,
+ 'Ti': 47.867,
+ 'V': 50.9415,
+ 'Cr': 51.9961,
+ 'Mn': 54.938049,
+ 'Fe': 55.845,
+ 'Co': 58.933200,
+ 'Ni': 58.6934,
+ 'Cu': 63.546,
+ 'Zn': 65.39,
+ 'Ga': 69.723,
+ 'Ge': 72.64,
+ 'As': 74.92160,
+ 'Se': 78.96,
+ 'Br': 79.904,
+ 'Kr': 83.80,
+ 'Rb': 85.4678,
+ 'Sr': 87.62,
+ 'Y': 88.90585,
+ 'Zr': 91.224,
+ 'Nb': 92.90638,
+ 'Mo': 95.94,
+ 'Tc': 98.0,
+ 'Ru': 101.07,
+ 'Rh': 102.90550,
+ 'Pd': 106.42,
+ 'Ag': 107.8682,
+ 'Cd': 112.411,
+ 'In': 114.818,
+ 'Sn': 118.710,
+ 'Sb': 121.760,
+ 'Te': 127.60,
+ 'I': 126.90447,
+ 'Xe': 131.293,
+ 'Cs': 132.90545,
+ 'Ba': 137.327,
+ 'La': 138.9055,
+ 'Ce': 140.116,
+ 'Pr': 140.90765,
+ 'Nd': 144.24,
+ 'Pm': 145.0,
+ 'Sm': 150.36,
+ 'Eu': 151.964,
+ 'Gd': 157.25,
+ 'Tb': 158.92534,
+ 'Dy': 162.50,
+ 'Ho': 164.93032,
+ 'Er': 167.259,
+ 'Tm': 168.93421,
+ 'Yb': 173.04,
+ 'Lu': 174.967,
+ 'Hf': 178.49,
+ 'Ta': 180.9479,
+ 'W': 183.84,
+ 'Re': 186.207,
+ 'Os': 190.23,
+ 'Ir': 192.217,
+ 'Pt': 195.078,
+ 'Au': 196.96655,
+ 'Hg': 200.59,
+ 'Tl': 204.3833,
+ 'Pb': 207.2,
+ 'Bi': 208.98038,
+ 'Po': 208.98,
+ 'At': 209.99,
+ 'Rn': 222.02,
+ 'Fr': 223.02,
+ 'Ra': 226.03,
+ 'Ac': 227.03,
+ 'Th': 232.0381,
+ 'Pa': 231.03588,
+ 'U': 238.02891,
+ 'Np': 237.05,
+ 'Pu': 244.06,
+ 'Am': 243.06,
+ 'Cm': 247.07,
+ 'Bk': 247.07,
+ 'Cf': 251.08,
+ 'Es': 252.08,
+ 'Fm': 257.10,
+ 'Md': 258.10,
+ 'No': 259.10,
+ 'Lr': 262.11,
+ 'Rf': 261.11,
+ 'Db': 262.11,
+ 'Sg': 266.12,
+ 'Bh': 264.12,
+ 'Hs': 269.13,
+ 'Mt': 268.14,
}
diff -Nru python-biopython-1.62/Bio/DocSQL.py python-biopython-1.63/Bio/DocSQL.py
--- python-biopython-1.62/Bio/DocSQL.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/DocSQL.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,8 +5,7 @@
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
-Bio.DocSQL: easy access to DB API databases.
+"""Bio.DocSQL: easy access to DB API databases.
>>> import os
>>> import MySQLdb
@@ -24,8 +23,7 @@
CreatePeople(message=Success)
"""
-__version__ = "$Revision: 1.13 $"
-# $Source: /home/bartek/cvs2bzr/biopython_fastimport/cvs_repo/biopython/Bio/DocSQL.py,v $
+from __future__ import print_function
import sys
@@ -121,7 +119,7 @@
def dump(self):
for item in self:
- print item
+ print(item)
class QueryGeneric(Query):
@@ -137,13 +135,23 @@
self.cursor = connection.cursor()
self.row_class = query.row_class
if query.diagnostics:
- print >>sys.stderr, query.statement
- print >>sys.stderr, query.params
+ sys.stderr.write("Query statement: %s\n" % query.statement)
+ sys.stderr.write("Query params: %s\n" % query.params)
self.cursor.execute(query.statement, query.params)
- def next(self):
+ def __next__(self):
return self.row_class(self.cursor)
+ if sys.version_info[0] < 3:
+ def next(self):
+ """Deprecated Python 2 style alias for Python 3 style __next__ method."""
+ import warnings
+ from Bio import BiopythonDeprecationWarning
+ warnings.warn("Please use next(my_iterator) instead of my_iterator.next(), "
+ "the .next() method is deprecated and will be removed in a "
+ "future release of Biopython.", BiopythonDeprecationWarning)
+ return self.__next__()
+
class QuerySingle(Query, QueryRow):
ignore_warnings = 0
@@ -166,7 +174,7 @@
class QueryAll(list, Query):
def __init__(self, *args, **keywds):
Query.__init__(self, *args, **keywds)
- list.__init__(self, map(self.process_row, self.cursor().fetchall()))
+ list.__init__(self, [self.process_row(r) for r in self.cursor().fetchall()])
def process_row(self, row):
return row
@@ -195,7 +203,7 @@
def __init__(self, *args, **keywds):
try:
Create.__init__(self, *args, **keywds)
- except MySQLdb.IntegrityError, error_data:
+ except MySQLdb.IntegrityError as error_data:
self.error_message += self.MSG_INTEGRITY_ERROR % error_data[1]
try:
self.total_count
diff -Nru python-biopython-1.62/Bio/Emboss/Applications.py python-biopython-1.63/Bio/Emboss/Applications.py
--- python-biopython-1.62/Bio/Emboss/Applications.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Emboss/Applications.py 2013-12-05 14:10:43.000000000 +0000
@@ -12,6 +12,8 @@
programs.
"""
+from __future__ import print_function
+
from Bio.Application import _Option, _Switch, AbstractCommandline
@@ -38,38 +40,38 @@
def __init__(self, cmd=None, **kwargs):
assert cmd is not None
extra_parameters = [
- _Switch(["-auto","auto"],
+ _Switch(["-auto", "auto"],
"""Turn off prompts.
Automatic mode disables prompting, so we recommend you set
this argument all the time when calling an EMBOSS tool from
Biopython.
"""),
- _Switch(["-stdout","stdout"],
+ _Switch(["-stdout", "stdout"],
"Write standard output."),
- _Switch(["-filter","filter"],
+ _Switch(["-filter", "filter"],
"Read standard input, write standard output."),
- _Switch(["-options","options"],
+ _Switch(["-options", "options"],
"""Prompt for standard and additional values.
If you are calling an EMBOSS tool from within Biopython,
we DO NOT recommend using this option.
"""),
- _Switch(["-debug","debug"],
+ _Switch(["-debug", "debug"],
"Write debug output to program.dbg."),
- _Switch(["-verbose","verbose"],
+ _Switch(["-verbose", "verbose"],
"Report some/full command line options"),
- _Switch(["-help","help"],
+ _Switch(["-help", "help"],
"""Report command line options.
More information on associated and general qualifiers can
be found with -help -verbose
"""),
- _Switch(["-warning","warning"],
+ _Switch(["-warning", "warning"],
"Report warnings."),
- _Switch(["-error","error"],
+ _Switch(["-error", "error"],
"Report errors."),
- _Switch(["-die","die"],
+ _Switch(["-die", "die"],
"Report dying program messages."),
]
try:
@@ -94,7 +96,7 @@
def __init__(self, cmd=None, **kwargs):
assert cmd is not None
extra_parameters = [
- _Option(["-outfile","outfile"],
+ _Option(["-outfile", "outfile"],
"Output filename",
filename=True),
]
@@ -135,7 +137,7 @@
Traceback (most recent call last):
...
ValueError: Option name bogusparameter was not found.
- >>> print cline
+ >>> print(cline)
eprimer3 -auto -outfile=myresults.out -sequence=mysequence.fas -hybridprobe=True -psizeopt=200 -osizeopt=20 -explainflag=True
The equivalent for anyone still using an older version of EMBOSS would be:
@@ -145,62 +147,62 @@
>>> cline.oligosize=20 # Old EMBOSS, instead of osizeopt
>>> cline.productosize=200 # Old EMBOSS, instead of psizeopt
>>> cline.outfile = "myresults.out"
- >>> print cline
+ >>> print(cline)
eprimer3 -auto -outfile=myresults.out -sequence=mysequence.fas -hybridprobe=True -productosize=200 -oligosize=20 -explainflag=True
"""
def __init__(self, cmd="eprimer3", **kwargs):
self.parameters = [
- _Option(["-sequence","sequence"],
+ _Option(["-sequence", "sequence"],
"Sequence to choose primers from.",
is_required=True),
- _Option(["-task","task"],
+ _Option(["-task", "task"],
"Tell eprimer3 what task to perform."),
- _Option(["-hybridprobe","hybridprobe"],
+ _Option(["-hybridprobe", "hybridprobe"],
"Find an internal oligo to use as a hyb probe."),
- _Option(["-numreturn","numreturn"],
+ _Option(["-numreturn", "numreturn"],
"Maximum number of primer pairs to return."),
- _Option(["-includedregion","includedregion"],
+ _Option(["-includedregion", "includedregion"],
"Subregion of the sequence in which to pick primers."),
- _Option(["-target","target"],
+ _Option(["-target", "target"],
"Sequence to target for flanking primers."),
- _Option(["-excludedregion","excludedregion"],
+ _Option(["-excludedregion", "excludedregion"],
"Regions to exclude from primer picking."),
- _Option(["-forwardinput","forwardinput"],
+ _Option(["-forwardinput", "forwardinput"],
"Sequence of a forward primer to check."),
- _Option(["-reverseinput","reverseinput"],
+ _Option(["-reverseinput", "reverseinput"],
"Sequence of a reverse primer to check."),
- _Option(["-gcclamp","gcclamp"],
+ _Option(["-gcclamp", "gcclamp"],
"The required number of Gs and Cs at the 3' of each primer."),
- _Option(["-osize","osize"],
+ _Option(["-osize", "osize"],
"Optimum length of a primer oligo."),
- _Option(["-minsize","minsize"],
+ _Option(["-minsize", "minsize"],
"Minimum length of a primer oligo."),
- _Option(["-maxsize","maxsize"],
+ _Option(["-maxsize", "maxsize"],
"Maximum length of a primer oligo."),
- _Option(["-otm","otm"],
+ _Option(["-otm", "otm"],
"Optimum melting temperature for a primer oligo."),
- _Option(["-mintm","mintm"],
+ _Option(["-mintm", "mintm"],
"Minimum melting temperature for a primer oligo."),
- _Option(["-maxtm","maxtm"],
+ _Option(["-maxtm", "maxtm"],
"Maximum melting temperature for a primer oligo."),
- _Option(["-maxdifftm","maxdifftm"],
+ _Option(["-maxdifftm", "maxdifftm"],
"Maximum difference in melting temperatures between "
"forward and reverse primers."),
- _Option(["-ogcpercent","ogcpercent"],
+ _Option(["-ogcpercent", "ogcpercent"],
"Optimum GC% for a primer."),
- _Option(["-mingc","mingc"],
+ _Option(["-mingc", "mingc"],
"Minimum GC% for a primer."),
- _Option(["-maxgc","maxgc"],
+ _Option(["-maxgc", "maxgc"],
"Maximum GC% for a primer."),
- _Option(["-saltconc","saltconc"],
+ _Option(["-saltconc", "saltconc"],
"Millimolar salt concentration in the PCR."),
- _Option(["-dnaconc","dnaconc"],
+ _Option(["-dnaconc", "dnaconc"],
"Nanomolar concentration of annealing oligos in the PCR."),
- _Option(["-maxpolyx","maxpolyx"],
+ _Option(["-maxpolyx", "maxpolyx"],
"Maximum allowable mononucleotide repeat length in a primer."),
#Primer length:
- _Option(["-productosize","productosize"],
+ _Option(["-productosize", "productosize"],
"""Optimum size for the PCR product (OBSOLETE).
Option replaced in EMBOSS 6.1.0 by -psizeopt
@@ -210,7 +212,7 @@
Option added in EMBOSS 6.1.0, replacing -productosize
"""),
- _Option(["-productsizerange","productsizerange"],
+ _Option(["-productsizerange", "productsizerange"],
"""Acceptable range of length for the PCR product (OBSOLETE).
Option replaced in EMBOSS 6.1.0 by -prange
@@ -221,7 +223,7 @@
Option added in EMBOSS 6.1.0, replacing -productsizerange
"""),
#Primer temperature:
- _Option(["-productotm","productotm"],
+ _Option(["-productotm", "productotm"],
"""Optimum melting temperature for the PCR product (OBSOLETE).
Option replaced in EMBOSS 6.1.0 by -ptmopt
@@ -231,7 +233,7 @@
Option added in EMBOSS 6.1.0, replacing -productotm
"""),
- _Option(["-productmintm","productmintm"],
+ _Option(["-productmintm", "productmintm"],
"""Minimum allowed melting temperature for the amplicon (OBSOLETE)
Option replaced in EMBOSS 6.1.0 by -ptmmin
@@ -241,7 +243,7 @@
Option added in EMBOSS 6.1.0, replacing -productmintm
"""),
- _Option(["-productmaxtm","productmaxtm"],
+ _Option(["-productmaxtm", "productmaxtm"],
"""Maximum allowed melting temperature for the amplicon (OBSOLETE).
Option replaced in EMBOSS 6.1.0 by -ptmmax
@@ -262,10 +264,10 @@
Option replaced in EMBOSS 6.1.0 by -oexcluderegion.
"""),
- _Option(["-oligoinput","oligoinput"],
+ _Option(["-oligoinput", "oligoinput"],
"Sequence of the internal oligo."),
#Oligo length:
- _Option(["-oligosize","oligosize"],
+ _Option(["-oligosize", "oligosize"],
"""Optimum length of internal oligo (OBSOLETE).
Option replaced in EMBOSS 6.1.0 by -osizeopt.
@@ -275,7 +277,7 @@
Option added in EMBOSS 6.1.0, replaces -oligosize
"""),
- _Option(["-oligominsize","oligominsize"],
+ _Option(["-oligominsize", "oligominsize"],
"""Minimum length of internal oligo (OBSOLETE)."),
Option replaced in EMBOSS 6.1.0 by -ominsize.
@@ -285,7 +287,7 @@
Option added in EMBOSS 6.1.0, replaces -oligominsize
"""),
- _Option(["-oligomaxsize","oligomaxsize"],
+ _Option(["-oligomaxsize", "oligomaxsize"],
"""Maximum length of internal oligo (OBSOLETE).
Option replaced in EMBOSS 6.1.0 by -omaxsize.
@@ -296,7 +298,7 @@
Option added in EMBOSS 6.1.0, replaces -oligomaxsize
"""),
#Oligo GC temperature:
- _Option(["-oligotm","oligotm"],
+ _Option(["-oligotm", "oligotm"],
"""Optimum melting temperature of internal oligo (OBSOLETE).
Option replaced in EMBOSS 6.1.0 by -otmopt.
@@ -306,7 +308,7 @@
Option added in EMBOSS 6.1.0.
"""),
- _Option(["-oligomintm","oligomintm"],
+ _Option(["-oligomintm", "oligomintm"],
"""Minimum melting temperature of internal oligo (OBSOLETE).
Option replaced in EMBOSS 6.1.0 by -otmmin.
@@ -316,7 +318,7 @@
Option added in EMBOSS 6.1.0, replacing -oligomintm
"""),
- _Option(["-oligomaxtm","oligomaxtm"],
+ _Option(["-oligomaxtm", "oligomaxtm"],
"""Maximum melting temperature of internal oligo (OBSOLETE).
Option replaced in EMBOSS 6.1.0 by -otmmax.
@@ -327,7 +329,7 @@
Option added in EMBOSS 6.1.0, replacing -oligomaxtm
"""),
#Oligo GC percent:
- _Option(["-oligoogcpercent","oligoogcpercent"],
+ _Option(["-oligoogcpercent", "oligoogcpercent"],
"""Optimum GC% for internal oligo (OBSOLETE).
Option replaced in EMBOSS 6.1.0 by -ogcopt.
@@ -337,7 +339,7 @@
Option added in EMBOSS 6.1.0, replacing -oligoogcpercent
"""),
- _Option(["-oligomingc","oligomingc"],
+ _Option(["-oligomingc", "oligomingc"],
"""Minimum GC% for internal oligo (OBSOLETE).
Option replaced in EMBOSS 6.1.0 by -ogcmin.
@@ -347,7 +349,7 @@
Option added in EMBOSS 6.1.0, replacing -oligomingc
"""),
- _Option(["-oligomaxgc","oligomaxgc"],
+ _Option(["-oligomaxgc", "oligomaxgc"],
"""Maximum GC% for internal oligo.
Option replaced in EMBOSS 6.1.0 by -ogcmax
@@ -358,7 +360,7 @@
Option added in EMBOSS 6.1.0, replacing -oligomaxgc
"""),
#Oligo salt concentration:
- _Option(["-oligosaltconc","oligosaltconc"],
+ _Option(["-oligosaltconc", "oligosaltconc"],
"""Millimolar concentration of salt in the hybridisation."),
Option replaced in EMBOSS 6.1.0 by -osaltconc
@@ -368,7 +370,7 @@
Option added in EMBOSS 6.1.0, replacing -oligosaltconc
"""),
- _Option(["-oligodnaconc","oligodnaconc"],
+ _Option(["-oligodnaconc", "oligodnaconc"],
"""Nanomolar concentration of internal oligo in the hybridisation.
Option replaced in EMBOSS 6.1.0 by -odnaconc
@@ -379,7 +381,7 @@
Option added in EMBOSS 6.1.0, replacing -oligodnaconc
"""),
#Oligo self complementarity
- _Option(["-oligoselfany","oligoselfany"],
+ _Option(["-oligoselfany", "oligoselfany"],
"""Maximum allowable alignment score for self-complementarity (OBSOLETE).
Option replaced in EMBOSS 6.1.0 by -oanyself
@@ -389,7 +391,7 @@
Option added in EMBOSS 6.1.0, replacing -oligoselfany
"""),
- _Option(["-oligoselfend","oligoselfend"],
+ _Option(["-oligoselfend", "oligoselfend"],
"""Maximum allowable 3`-anchored global alignment score "
for self-complementarity (OBSOLETE).
@@ -400,7 +402,7 @@
Option added in EMBOSS 6.1.0, replacing -oligoselfend
"""),
- _Option(["-oligomaxpolyx","oligomaxpolyx"],
+ _Option(["-oligomaxpolyx", "oligomaxpolyx"],
"""Maximum length of mononucleotide repeat in internal oligo (OBSOLETE).
Option replaced in EMBOSS 6.1.0 by -opolyxmax
@@ -410,12 +412,12 @@
Option added in EMBOSS 6.1.0, replacing -oligomaxpolyx
"""),
- _Option(["-mispriminglibraryfile","mispriminglibraryfile"],
+ _Option(["-mispriminglibraryfile", "mispriminglibraryfile"],
"File containing library of sequences to avoid amplifying"),
- _Option(["-maxmispriming","maxmispriming"],
+ _Option(["-maxmispriming", "maxmispriming"],
"Maximum allowed similarity of primers to sequences in "
"library specified by -mispriminglibrary"),
- _Option(["-oligomaxmishyb","oligomaxmishyb"],
+ _Option(["-oligomaxmishyb", "oligomaxmishyb"],
"""Maximum alignment score for hybridisation of internal oligo to
library specified by -oligomishyblibraryfile (OBSOLETE).
@@ -438,7 +440,7 @@
Option added in EMBOSS 6.1.0, replacing -oligomishyblibraryfile
"""),
- _Option(["-explainflag","explainflag"],
+ _Option(["-explainflag", "explainflag"],
"Produce output tags with eprimer3 statistics"),
]
_EmbossCommandLine.__init__(self, cmd, **kwargs)
@@ -449,25 +451,25 @@
"""
def __init__(self, cmd="primersearch", **kwargs):
self.parameters = [
- _Option(["-seqall","-sequences","sequences","seqall"],
+ _Option(["-seqall", "-sequences", "sequences", "seqall"],
"Sequence to look for the primer pairs in.",
is_required=True),
#When this wrapper was written primersearch used -sequences
#as the argument name. Since at least EMBOSS 5.0 (and
#perhaps earlier) this has been -seqall instead.
- _Option(["-infile","-primers","primers","infile"],
+ _Option(["-infile", "-primers", "primers", "infile"],
"File containing the primer pairs to search for.",
filename=True,
is_required=True),
#When this wrapper was written primersearch used -primers
#as the argument name. Since at least EMBOSS 5.0 (and
#perhaps earlier) this has been -infile instead.
- _Option(["-mismatchpercent","mismatchpercent"],
+ _Option(["-mismatchpercent", "mismatchpercent"],
"Allowed percentage mismatch (any integer value, default 0).",
is_required=True),
- _Option(["-snucleotide","snucleotide"],
+ _Option(["-snucleotide", "snucleotide"],
"Sequences are nucleotide (boolean)"),
- _Option(["-sprotein","sprotein"],
+ _Option(["-sprotein", "sprotein"],
"Sequences are protein (boolean)"),
]
_EmbossCommandLine.__init__(self, cmd, **kwargs)
@@ -494,7 +496,7 @@
"number of rate catergories (1-9)"),
_Option(["-rate", "rate"],
"rate for each category"),
- _Option(["-categories","categories"],
+ _Option(["-categories", "categories"],
"File of substitution rate categories"),
_Option(["-weights", "weights"],
"weights file"),
@@ -554,7 +556,7 @@
"is martrix [S]quare pr [u]pper or [l]ower"),
_Option(["-treetype", "treetype"],
"nj or UPGMA tree (n/u)"),
- _Option(["-outgrno","outgrno" ],
+ _Option(["-outgrno", "outgrno" ],
"taxon to use as OG"),
_Option(["-jumble", "jumble"],
"randommise input order (Y/n)"),
@@ -714,7 +716,7 @@
"number of rate catergories (1-9)"),
_Option(["-rate", "rate"],
"rate for each category"),
- _Option(["-catergories","catergories"],
+ _Option(["-catergories", "catergories"],
"file of rates"),
_Option(["-weights", "weights"],
"weights file"),
@@ -771,34 +773,34 @@
"""
def __init__(self, cmd="water", **kwargs):
self.parameters = [
- _Option(["-asequence","asequence"],
+ _Option(["-asequence", "asequence"],
"First sequence to align",
filename=True,
is_required=True),
- _Option(["-bsequence","bsequence"],
+ _Option(["-bsequence", "bsequence"],
"Second sequence to align",
filename=True,
is_required=True),
- _Option(["-gapopen","gapopen"],
+ _Option(["-gapopen", "gapopen"],
"Gap open penalty",
is_required=True),
- _Option(["-gapextend","gapextend"],
+ _Option(["-gapextend", "gapextend"],
"Gap extension penalty",
is_required=True),
- _Option(["-datafile","datafile"],
+ _Option(["-datafile", "datafile"],
"Matrix file",
filename=True),
_Switch(["-nobrief", "nobrief"],
"Display extended identity and similarity"),
_Switch(["-brief", "brief"],
"Display brief identity and similarity"),
- _Option(["-similarity","similarity"],
+ _Option(["-similarity", "similarity"],
"Display percent identity and similarity"),
- _Option(["-snucleotide","snucleotide"],
+ _Option(["-snucleotide", "snucleotide"],
"Sequences are nucleotide (boolean)"),
- _Option(["-sprotein","sprotein"],
+ _Option(["-sprotein", "sprotein"],
"Sequences are protein (boolean)"),
- _Option(["-aformat","aformat"],
+ _Option(["-aformat", "aformat"],
"Display output in a different specified output format")]
_EmbossCommandLine.__init__(self, cmd, **kwargs)
@@ -808,21 +810,21 @@
"""
def __init__(self, cmd="needle", **kwargs):
self.parameters = [
- _Option(["-asequence","asequence"],
+ _Option(["-asequence", "asequence"],
"First sequence to align",
filename=True,
is_required=True),
- _Option(["-bsequence","bsequence"],
+ _Option(["-bsequence", "bsequence"],
"Second sequence to align",
filename=True,
is_required=True),
- _Option(["-gapopen","gapopen"],
+ _Option(["-gapopen", "gapopen"],
"Gap open penalty",
is_required=True),
- _Option(["-gapextend","gapextend"],
+ _Option(["-gapextend", "gapextend"],
"Gap extension penalty",
is_required=True),
- _Option(["-datafile","datafile"],
+ _Option(["-datafile", "datafile"],
"Matrix file",
filename=True),
_Option(["-endweight", "endweight"],
@@ -836,13 +838,13 @@
"Display extended identity and similarity"),
_Switch(["-brief", "brief"],
"Display brief identity and similarity"),
- _Option(["-similarity","similarity"],
+ _Option(["-similarity", "similarity"],
"Display percent identity and similarity"),
- _Option(["-snucleotide","snucleotide"],
+ _Option(["-snucleotide", "snucleotide"],
"Sequences are nucleotide (boolean)"),
- _Option(["-sprotein","sprotein"],
+ _Option(["-sprotein", "sprotein"],
"Sequences are protein (boolean)"),
- _Option(["-aformat","aformat"],
+ _Option(["-aformat", "aformat"],
"Display output in a different specified output format")]
_EmbossCommandLine.__init__(self, cmd, **kwargs)
@@ -852,24 +854,24 @@
"""
def __init__(self, cmd="needleall", **kwargs):
self.parameters = [
- _Option(["-asequence","asequence"],
+ _Option(["-asequence", "asequence"],
"First sequence to align",
filename=True,
is_required=True),
- _Option(["-bsequence","bsequence"],
+ _Option(["-bsequence", "bsequence"],
"Second sequence to align",
filename=True,
is_required=True),
- _Option(["-gapopen","gapopen"],
+ _Option(["-gapopen", "gapopen"],
"Gap open penalty",
is_required=True),
- _Option(["-gapextend","gapextend"],
+ _Option(["-gapextend", "gapextend"],
"Gap extension penalty",
is_required=True),
- _Option(["-datafile","datafile"],
+ _Option(["-datafile", "datafile"],
"Matrix file",
filename=True),
- _Option(["-minscore","minscore"],
+ _Option(["-minscore", "minscore"],
"Exclude alignments with scores below this threshold score."),
_Option(["-errorfile", "errorfile"],
"Error file to be written to."),
@@ -884,13 +886,13 @@
"Display extended identity and similarity"),
_Switch(["-brief", "brief"],
"Display brief identity and similarity"),
- _Option(["-similarity","similarity"],
+ _Option(["-similarity", "similarity"],
"Display percent identity and similarity"),
- _Option(["-snucleotide","snucleotide"],
+ _Option(["-snucleotide", "snucleotide"],
"Sequences are nucleotide (boolean)"),
- _Option(["-sprotein","sprotein"],
+ _Option(["-sprotein", "sprotein"],
"Sequences are protein (boolean)"),
- _Option(["-aformat","aformat"],
+ _Option(["-aformat", "aformat"],
"Display output in a different specified output format")]
_EmbossCommandLine.__init__(self, cmd, **kwargs)
@@ -900,30 +902,30 @@
"""
def __init__(self, cmd="stretcher", **kwargs):
self.parameters = [
- _Option(["-asequence","asequence"],
+ _Option(["-asequence", "asequence"],
"First sequence to align",
filename=True,
is_required=True),
- _Option(["-bsequence","bsequence"],
+ _Option(["-bsequence", "bsequence"],
"Second sequence to align",
filename=True,
is_required=True),
- _Option(["-gapopen","gapopen"],
+ _Option(["-gapopen", "gapopen"],
"Gap open penalty",
is_required=True,
checker_function=lambda value: isinstance(value, int)),
- _Option(["-gapextend","gapextend"],
+ _Option(["-gapextend", "gapextend"],
"Gap extension penalty",
is_required=True,
checker_function=lambda value: isinstance(value, int)),
- _Option(["-datafile","datafile"],
+ _Option(["-datafile", "datafile"],
"Matrix file",
filename=True),
- _Option(["-snucleotide","snucleotide"],
+ _Option(["-snucleotide", "snucleotide"],
"Sequences are nucleotide (boolean)"),
- _Option(["-sprotein","sprotein"],
+ _Option(["-sprotein", "sprotein"],
"Sequences are protein (boolean)"),
- _Option(["-aformat","aformat"],
+ _Option(["-aformat", "aformat"],
"Display output in a different specified output format")]
_EmbossCommandLine.__init__(self, cmd, **kwargs)
@@ -933,18 +935,18 @@
"""
def __init__(self, cmd="fuzznuc", **kwargs):
self.parameters = [
- _Option(["-sequence","sequence"],
+ _Option(["-sequence", "sequence"],
"Sequence database USA",
is_required=True),
- _Option(["-pattern","pattern"],
+ _Option(["-pattern", "pattern"],
"Search pattern, using standard IUPAC one-letter codes",
is_required=True),
- _Option(["-mismatch","mismatch"],
+ _Option(["-mismatch", "mismatch"],
"Number of mismatches",
is_required=True),
- _Option(["-complement","complement"],
+ _Option(["-complement", "complement"],
"Search complementary strand"),
- _Option(["-rformat","rformat"],
+ _Option(["-rformat", "rformat"],
"Specify the report format to output in.")]
_EmbossCommandLine.__init__(self, cmd, **kwargs)
@@ -954,44 +956,44 @@
"""
def __init__(self, cmd="est2genome", **kwargs):
self.parameters = [
- _Option(["-est","est"],
+ _Option(["-est", "est"],
"EST sequence(s)",
is_required=True),
- _Option(["-genome","genome"],
+ _Option(["-genome", "genome"],
"Genomic sequence",
is_required=True),
- _Option(["-match","match"],
+ _Option(["-match", "match"],
"Score for matching two bases"),
- _Option(["-mismatch","mismatch"],
+ _Option(["-mismatch", "mismatch"],
"Cost for mismatching two bases"),
- _Option(["-gappenalty","gappenalty"],
+ _Option(["-gappenalty", "gappenalty"],
"Cost for deleting a single base in either sequence, "
"excluding introns"),
- _Option(["-intronpenalty","intronpenalty"],
+ _Option(["-intronpenalty", "intronpenalty"],
"Cost for an intron, independent of length."),
- _Option(["-splicepenalty","splicepenalty"],
+ _Option(["-splicepenalty", "splicepenalty"],
"Cost for an intron, independent of length "
"and starting/ending on donor-acceptor sites"),
- _Option(["-minscore","minscore"],
+ _Option(["-minscore", "minscore"],
"Exclude alignments with scores below this threshold score."),
- _Option(["-reverse","reverse"],
+ _Option(["-reverse", "reverse"],
"Reverse the orientation of the EST sequence"),
- _Option(["-splice","splice"],
+ _Option(["-splice", "splice"],
"Use donor and acceptor splice sites."),
- _Option(["-mode","mode"],
+ _Option(["-mode", "mode"],
"This determines the comparion mode. 'both', 'forward' "
"'reverse'"),
- _Option(["-best","best"],
+ _Option(["-best", "best"],
"You can print out all comparisons instead of just the best"),
- _Option(["-space","space"],
+ _Option(["-space", "space"],
"for linear-space recursion."),
- _Option(["-shuffle","shuffle"],
+ _Option(["-shuffle", "shuffle"],
"Shuffle"),
- _Option(["-seed","seed"],
+ _Option(["-seed", "seed"],
"Random number seed"),
- _Option(["-align","align"],
+ _Option(["-align", "align"],
"Show the alignment."),
- _Option(["-width","width"],
+ _Option(["-width", "width"],
"Alignment width")
]
_EmbossCommandLine.__init__(self, cmd, **kwargs)
@@ -1002,23 +1004,23 @@
"""
def __init__(self, cmd="etandem", **kwargs):
self.parameters = [
- _Option(["-sequence","sequence"],
+ _Option(["-sequence", "sequence"],
"Sequence",
filename=True,
is_required=True),
- _Option(["-minrepeat","minrepeat"],
+ _Option(["-minrepeat", "minrepeat"],
"Minimum repeat size",
is_required=True),
- _Option(["-maxrepeat","maxrepeat"],
+ _Option(["-maxrepeat", "maxrepeat"],
"Maximum repeat size",
is_required=True),
- _Option(["-threshold","threshold"],
+ _Option(["-threshold", "threshold"],
"Threshold score"),
- _Option(["-mismatch","mismatch"],
+ _Option(["-mismatch", "mismatch"],
"Allow N as a mismatch"),
- _Option(["-uniform","uniform"],
+ _Option(["-uniform", "uniform"],
"Allow uniform consensus"),
- _Option(["-rformat","rformat"],
+ _Option(["-rformat", "rformat"],
"Output report format")]
_EmbossCommandLine.__init__(self, cmd, **kwargs)
@@ -1028,24 +1030,24 @@
"""
def __init__(self, cmd="einverted", **kwargs):
self.parameters = [
- _Option(["-sequence","sequence"],
+ _Option(["-sequence", "sequence"],
"Sequence",
filename=True,
is_required=True),
- _Option(["-gap","gap"],
+ _Option(["-gap", "gap"],
"Gap penalty",
filename=True,
is_required=True),
- _Option(["-threshold","threshold"],
+ _Option(["-threshold", "threshold"],
"Minimum score threshold",
is_required=True),
- _Option(["-match","match"],
+ _Option(["-match", "match"],
"Match score",
is_required=True),
- _Option(["-mismatch","mismatch"],
+ _Option(["-mismatch", "mismatch"],
"Mismatch score",
is_required=True),
- _Option(["-maxrepeat","maxrepeat"],
+ _Option(["-maxrepeat", "maxrepeat"],
"Maximum separation between the start and end of repeat"),
]
_EmbossCommandLine.__init__(self, cmd, **kwargs)
@@ -1056,23 +1058,23 @@
"""
def __init__(self, cmd="palindrome", **kwargs):
self.parameters = [
- _Option(["-sequence","sequence"],
+ _Option(["-sequence", "sequence"],
"Sequence",
filename=True,
is_required=True),
- _Option(["-minpallen","minpallen"],
+ _Option(["-minpallen", "minpallen"],
"Minimum palindrome length",
is_required=True),
- _Option(["-maxpallen","maxpallen"],
+ _Option(["-maxpallen", "maxpallen"],
"Maximum palindrome length",
is_required=True),
- _Option(["-gaplimit","gaplimit"],
+ _Option(["-gaplimit", "gaplimit"],
"Maximum gap between repeats",
is_required=True),
- _Option(["-nummismatches","nummismatches"],
+ _Option(["-nummismatches", "nummismatches"],
"Number of mismatches allowed",
is_required=True),
- _Option(["-overlap","overlap"],
+ _Option(["-overlap", "overlap"],
"Report overlapping matches",
is_required=True),
]
@@ -1084,19 +1086,19 @@
"""
def __init__(self, cmd="tranalign", **kwargs):
self.parameters = [
- _Option(["-asequence","asequence"],
+ _Option(["-asequence", "asequence"],
"Nucleotide sequences to be aligned.",
filename=True,
is_required=True),
- _Option(["-bsequence","bsequence"],
+ _Option(["-bsequence", "bsequence"],
"Protein sequence alignment",
filename=True,
is_required=True),
- _Option(["-outseq","outseq"],
+ _Option(["-outseq", "outseq"],
"Output sequence file.",
filename=True,
is_required=True),
- _Option(["-table","table"],
+ _Option(["-table", "table"],
"Code to use")]
_EmbossCommandLine.__init__(self, cmd, **kwargs)
@@ -1106,26 +1108,26 @@
"""
def __init__(self, cmd="diffseq", **kwargs):
self.parameters = [
- _Option(["-asequence","asequence"],
+ _Option(["-asequence", "asequence"],
"First sequence to compare",
filename=True,
is_required=True),
- _Option(["-bsequence","bsequence"],
+ _Option(["-bsequence", "bsequence"],
"Second sequence to compare",
filename=True,
is_required=True),
- _Option(["-wordsize","wordsize"],
+ _Option(["-wordsize", "wordsize"],
"Word size to use for comparisons (10 default)",
is_required=True),
- _Option(["-aoutfeat","aoutfeat"],
+ _Option(["-aoutfeat", "aoutfeat"],
"File for output of first sequence's features",
filename=True,
is_required=True),
- _Option(["-boutfeat","boutfeat"],
+ _Option(["-boutfeat", "boutfeat"],
"File for output of second sequence's features",
filename=True,
is_required=True),
- _Option(["-rformat","rformat"],
+ _Option(["-rformat", "rformat"],
"Output report file format")
]
_EmbossCommandLine.__init__(self, cmd, **kwargs)
@@ -1139,7 +1141,7 @@
>>> from Bio.Emboss.Applications import IepCommandline
>>> iep_cline = IepCommandline(sequence="proteins.faa",
... outfile="proteins.txt")
- >>> print iep_cline
+ >>> print(iep_cline)
iep -outfile=proteins.txt -sequence=proteins.faa
You would typically run the command line with iep_cline() or via the
@@ -1147,32 +1149,32 @@
"""
def __init__(self, cmd="iep", **kwargs):
self.parameters = [
- _Option(["-sequence","sequence"],
+ _Option(["-sequence", "sequence"],
"Protein sequence(s) filename",
filename=True,
is_required=True),
- _Option(["-amino","amino"],
+ _Option(["-amino", "amino"],
"""Number of N-termini
Integer 0 (default) or more.
"""),
- _Option(["-carboxyl","carboxyl"],
+ _Option(["-carboxyl", "carboxyl"],
"""Number of C-termini
Integer 0 (default) or more.
"""),
- _Option(["-lysinemodified","lysinemodified"],
+ _Option(["-lysinemodified", "lysinemodified"],
"""Number of modified lysines
Integer 0 (default) or more.
"""),
- _Option(["-disulphides","disulphides"],
+ _Option(["-disulphides", "disulphides"],
"""Number of disulphide bridges
Integer 0 (default) or more.
"""),
#Should we implement the -termini switch as well?
- _Option(["-notermini","notermini"],
+ _Option(["-notermini", "notermini"],
"Exclude (True) or include (False) charge at N and C terminus."),
]
_EmbossCommandLine.__init__(self, cmd, **kwargs)
@@ -1192,15 +1194,15 @@
"""
def __init__(self, cmd="seqret", **kwargs):
self.parameters = [
- _Option(["-sequence","sequence"],
+ _Option(["-sequence", "sequence"],
"Input sequence(s) filename",
filename=True),
- _Option(["-outseq","outseq"],
+ _Option(["-outseq", "outseq"],
"Output sequence file.",
filename=True),
- _Option(["-sformat","sformat"],
+ _Option(["-sformat", "sformat"],
"Input sequence(s) format (e.g. fasta, genbank)"),
- _Option(["-osformat","osformat"],
+ _Option(["-osformat", "osformat"],
"Output sequence(s) format (e.g. fasta, genbank)"),
]
_EmbossMinimalCommandLine.__init__(self, cmd, **kwargs)
@@ -1226,7 +1228,7 @@
>>> cline.auto = True
>>> cline.wordsize = 18
>>> cline.aformat = "pair"
- >>> print cline
+ >>> print(cline)
seqmatchall -auto -outfile=opuntia.txt -sequence=opuntia.fasta -wordsize=18 -aformat=pair
"""
@@ -1238,7 +1240,7 @@
is_required=True),
_Option(["-wordsize", "wordsize"],
"Word size (Integer 2 or more, default 4)"),
- _Option(["-aformat","aformat"],
+ _Option(["-aformat", "aformat"],
"Display output in a different specified output format"),
]
_EmbossCommandLine.__init__(self, cmd, **kwargs)
@@ -1252,3 +1254,4 @@
if __name__ == "__main__":
#Run the doctests
_test()
+
diff -Nru python-biopython-1.62/Bio/Emboss/Primer3.py python-biopython-1.63/Bio/Emboss/Primer3.py
--- python-biopython-1.62/Bio/Emboss/Primer3.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Emboss/Primer3.py 2013-12-05 14:10:43.000000000 +0000
@@ -146,7 +146,7 @@
except IndexError: # eprimer3 reports oligo without sequence
primer.internal_seq = ''
try:
- line = handle.next()
+ line = next(handle)
except StopIteration:
break
if record:
@@ -161,11 +161,11 @@
"""
iterator = parse(handle)
try:
- first = iterator.next()
+ first = next(iterator)
except StopIteration:
raise ValueError("No records found in handle")
try:
- second = iterator.next()
+ second = next(iterator)
except StopIteration:
second = None
if second is not None:
diff -Nru python-biopython-1.62/Bio/Emboss/PrimerSearch.py python-biopython-1.63/Bio/Emboss/PrimerSearch.py
--- python-biopython-1.62/Bio/Emboss/PrimerSearch.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Emboss/PrimerSearch.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Code to interact with the primersearch program from EMBOSS.
"""
diff -Nru python-biopython-1.62/Bio/Emboss/__init__.py python-biopython-1.63/Bio/Emboss/__init__.py
--- python-biopython-1.62/Bio/Emboss/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Emboss/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,2 +1,7 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Code to interact with the ever-so-useful EMBOSS programs.
"""
diff -Nru python-biopython-1.62/Bio/Entrez/Parser.py python-biopython-1.63/Bio/Entrez/Parser.py
--- python-biopython-1.62/Bio/Entrez/Parser.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Entrez/Parser.py 2013-12-05 14:10:43.000000000 +0000
@@ -36,11 +36,14 @@
import os.path
-import urlparse
-import urllib
import warnings
from xml.parsers import expat
+#Importing these functions with leading underscore as not intended for reuse
+from Bio._py3k import urlopen as _urlopen
+from Bio._py3k import urlparse as _urlparse
+from Bio._py3k import unicode
+
# The following four classes are used to add a member .attributes to integers,
# strings, lists, and dictionaries, respectively.
@@ -182,7 +185,7 @@
raise IOError("Can't parse a closed handle")
try:
self.parser.ParseFile(handle)
- except expat.ExpatError, e:
+ except expat.ExpatError as e:
if self.parser.StartElementHandler:
# We saw the initial >> records = Entrez.parse(handle)
>>> for record in records:
... # each record is a Python dictionary or list.
- ... print record['MedlineCitation']['Article']['ArticleTitle']
+ ... print(record['MedlineCitation']['Article']['ArticleTitle'])
Biopython: freely available Python tools for computational molecular biology and bioinformatics.
PDB file parser and structure class implemented in Python.
>>> handle.close()
@@ -68,12 +68,17 @@
_open Internally used function.
"""
-import urllib
-import urllib2
+from __future__ import print_function
+
import time
import warnings
import os.path
+#Importing these functions with leading underscore as not intended for reuse
+from Bio._py3k import urlopen as _urlopen
+from Bio._py3k import urlencode as _urlencode
+from Bio._py3k import HTTPError as _HTTPError
+
from Bio._py3k import _binary_to_string_handle, _as_bytes
email = None
@@ -118,7 +123,7 @@
>>> from Bio import Entrez
>>> Entrez.email = "Your.Name.Here@example.org"
>>> handle = Entrez.efetch(db="nucleotide", id="57240072", rettype="gb", retmode="text")
- >>> print handle.readline().strip()
+ >>> print(handle.readline().strip())
LOCUS AY851612 892 bp DNA linear PLN 10-APR-2007
>>> handle.close()
@@ -205,7 +210,7 @@
>>> handle = Entrez.elink(dbfrom="pubmed", id=pmid, linkname="pubmed_pubmed")
>>> record = Entrez.read(handle)
>>> handle.close()
- >>> print record[0]["LinkSetDb"][0]["LinkName"]
+ >>> print(record[0]["LinkSetDb"][0]["LinkName"])
pubmed_pubmed
>>> linked = [link["Id"] for link in record[0]["LinkSetDb"][0]["Link"]]
>>> "17121776" in linked
@@ -267,9 +272,9 @@
>>> handle = Entrez.esummary(db="journals", id="30367")
>>> record = Entrez.read(handle)
>>> handle.close()
- >>> print record[0]["Id"]
+ >>> print(record[0]["Id"])
30367
- >>> print record[0]["Title"]
+ >>> print(record[0]["Title"])
Computational biology and chemistry
"""
@@ -303,7 +308,7 @@
>>> handle.close()
>>> for row in record["eGQueryResult"]:
... if "pmc" in row["DbName"]:
- ... print row["Count"] > 60
+ ... print(row["Count"] > 60)
True
"""
@@ -330,9 +335,9 @@
>>> from Bio import Entrez
>>> Entrez.email = "Your.Name.Here@example.org"
>>> record = Entrez.read(Entrez.espell(term="biopythooon"))
- >>> print record["Query"]
+ >>> print(record["Query"])
biopythooon
- >>> print record["CorrectedQuery"]
+ >>> print(record["CorrectedQuery"])
biopython
"""
@@ -362,7 +367,7 @@
(if any) of each element in a dictionary my_element.attributes, and
the tag name in my_element.tag.
"""
- from Parser import DataHandler
+ from .Parser import DataHandler
handler = DataHandler(validate)
record = handler.read(handle)
return record
@@ -394,7 +399,7 @@
(if any) of each element in a dictionary my_element.attributes, and
the tag name in my_element.tag.
"""
- from Parser import DataHandler
+ from .Parser import DataHandler
handler = DataHandler(validate)
records = handler.parse(handle)
return records
@@ -446,17 +451,17 @@
a user at the email address provided before blocking access to the
E-utilities.""", UserWarning)
# Open a handle to Entrez.
- options = urllib.urlencode(params, doseq=True)
+ options = _urlencode(params, doseq=True)
#print cgi + "?" + options
try:
if post:
#HTTP POST
- handle = urllib2.urlopen(cgi, data=_as_bytes(options))
+ handle = _urlopen(cgi, data=_as_bytes(options))
else:
#HTTP GET
cgi += "?" + options
- handle = urllib2.urlopen(cgi)
- except urllib2.HTTPError, exception:
+ handle = _urlopen(cgi)
+ except _HTTPError as exception:
raise exception
return _binary_to_string_handle(handle)
@@ -466,10 +471,10 @@
def _test():
"""Run the module's doctests (PRIVATE)."""
- print "Running doctests..."
+ print("Running doctests...")
import doctest
doctest.testmod()
- print "Done"
+ print("Done")
if __name__ == "__main__":
_test()
diff -Nru python-biopython-1.62/Bio/ExPASy/ScanProsite.py python-biopython-1.63/Bio/ExPASy/ScanProsite.py
--- python-biopython-1.62/Bio/ExPASy/ScanProsite.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/ExPASy/ScanProsite.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,4 +1,12 @@
-import urllib
+# Copyright 2009 by Michiel de Hoon. All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+
+#Importing these functions with leading underscore as not intended for reuse
+from Bio._py3k import urlopen as _urlopen
+from Bio._py3k import urlencode as _urlencode
+
from xml.sax import handler
from xml.sax.expatreader import ExpatParser
@@ -37,12 +45,12 @@
"""
parameters = {'seq': seq,
'output': output}
- for key, value in keywords.iteritems():
+ for key, value in keywords.items():
if value is not None:
parameters[key] = value
- command = urllib.urlencode(parameters)
+ command = _urlencode(parameters)
url = "%s/cgi-bin/prosite/PSScan.cgi?%s" % (mirror, command)
- handle = urllib.urlopen(url)
+ handle = _urlopen(url)
return handle
diff -Nru python-biopython-1.62/Bio/ExPASy/__init__.py python-biopython-1.63/Bio/ExPASy/__init__.py
--- python-biopython-1.62/Bio/ExPASy/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/ExPASy/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -17,7 +17,9 @@
sprot_search_de Interface to the sprot-search-de CGI script.
"""
-import urllib
+#Importing these functions with leading underscore as not intended for reuse
+from Bio._py3k import urlopen as _urlopen
+from Bio._py3k import urlencode as _urlencode
def get_prodoc_entry(id, cgi='http://www.expasy.ch/cgi-bin/get-prodoc-entry'):
@@ -31,8 +33,7 @@
'There is no PROSITE documentation entry XXX. Please try again.'
"""
# Open a handle to ExPASy.
- handle = urllib.urlopen("%s?%s" % (cgi, id))
- return handle
+ return _urlopen("%s?%s" % (cgi, id))
def get_prosite_entry(id,
@@ -46,8 +47,7 @@
containing this line:
'There is currently no PROSITE entry for XXX. Please try again.'
"""
- handle = urllib.urlopen("%s?%s" % (cgi, id))
- return handle
+ return _urlopen("%s?%s" % (cgi, id))
def get_prosite_raw(id, cgi='http://www.expasy.ch/cgi-bin/get-prosite-raw.pl'):
@@ -59,8 +59,7 @@
For a non-existing key, ExPASy returns nothing.
"""
- handle = urllib.urlopen("%s?%s" % (cgi, id))
- return handle
+ return _urlopen("%s?%s" % (cgi, id))
def get_sprot_raw(id):
@@ -69,7 +68,7 @@
For an ID of XXX, fetches http://www.uniprot.org/uniprot/XXX.txt
(as per the http://www.expasy.ch/expasy_urls.html documentation).
"""
- return urllib.urlopen("http://www.uniprot.org/uniprot/%s.txt" % id)
+ return _urlopen("http://www.uniprot.org/uniprot/%s.txt" % id)
def sprot_search_ful(text, make_wild=None, swissprot=1, trembl=None,
@@ -87,9 +86,9 @@
variables['S'] = 'on'
if trembl:
variables['T'] = 'on'
- options = urllib.urlencode(variables)
+ options = _urlencode(variables)
fullcgi = "%s?%s" % (cgi, options)
- handle = urllib.urlopen(fullcgi)
+ handle = _urlopen(fullcgi)
return handle
@@ -107,7 +106,7 @@
variables['S'] = 'on'
if trembl:
variables['T'] = 'on'
- options = urllib.urlencode(variables)
+ options = _urlencode(variables)
fullcgi = "%s?%s" % (cgi, options)
- handle = urllib.urlopen(fullcgi)
+ handle = _urlopen(fullcgi)
return handle
diff -Nru python-biopython-1.62/Bio/FSSP/FSSPTools.py python-biopython-1.63/Bio/FSSP/FSSPTools.py
--- python-biopython-1.62/Bio/FSSP/FSSPTools.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/FSSP/FSSPTools.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
from Bio import FSSP
import copy
from Bio.Align import Generic
@@ -28,11 +33,9 @@
for j in align_dict.abs(i).pos_align_dict:
# loop within a position
mult_align_dict[j] += align_dict.abs(i).pos_align_dict[j].aa
- seq_order = mult_align_dict.keys()
- seq_order.sort()
fssp_align = Generic.Alignment(Alphabet.Gapped(
Alphabet.IUPAC.extended_protein))
- for i in seq_order:
+ for i in sorted(mult_align_dict):
fssp_align.add_sequence(sum_dict[i].pdb2+sum_dict[i].chain2,
mult_align_dict[i])
# fssp_align._add_numbering_table()
@@ -65,8 +68,7 @@
attr_value = getattr(sum_dict[prot_num], filter_attribute)
if attr_value >= low_bound and attr_value <= high_bound:
new_sum_dict[prot_num] = sum_dict[prot_num]
- prot_numbers = new_sum_dict.keys()
- prot_numbers.sort()
+ prot_numbers = sorted(new_sum_dict)
for pos_num in new_align_dict.abs_res_dict:
new_align_dict.abs(pos_num).pos_align_dict = {}
for prot_num in prot_numbers:
@@ -84,8 +86,7 @@
for prot_num in sum_dict:
if sum_dict[prot_num].pdb2+sum_dict[prot_num].chain2 == cur_pdb_name:
new_sum_dict[prot_num] = sum_dict[prot_num]
- prot_numbers = new_sum_dict.keys()
- prot_numbers.sort()
+ prot_numbers = sorted(new_sum_dict)
for pos_num in new_align_dict.abs_res_dict:
new_align_dict.abs(pos_num).pos_align_dict = {}
for prot_num in prot_numbers:
diff -Nru python-biopython-1.62/Bio/FSSP/__init__.py python-biopython-1.63/Bio/FSSP/__init__.py
--- python-biopython-1.62/Bio/FSSP/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/FSSP/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Parser for FSSP files, used in a database of protein fold classifications.
This is a module to handle FSSP files. For now it parses only the header,
@@ -10,8 +15,10 @@
tuple of two instances.
mult_align: returns a Biopython alignment object
"""
+from __future__ import print_function
+
import re
-import fssp_rec
+from . import fssp_rec
from Bio.Align import Generic
from Bio import Alphabet
fff_rec = fssp_rec.fff_rec
@@ -91,7 +98,7 @@
def __init__(self, in_str):
self.raw = in_str
in_rec = in_str.strip().split()
- # print in_rec
+ # print(in_rec)
self.nr = int(in_rec[0][:-1])
self.pdb1 = in_rec[1][:4]
if len(in_rec[1]) == 4:
@@ -128,7 +135,7 @@
class FSSPAlignRec(object):
def __init__(self, in_fff_rec):
- # print in_fff_rec
+ # print(in_fff_rec)
self.abs_res_num = int(in_fff_rec[fssp_rec.align.abs_res_num])
self.pdb_res_num = in_fff_rec[fssp_rec.align.pdb_res_num].strip()
self.chain_id = in_fff_rec[fssp_rec.align.chain_id]
@@ -182,9 +189,7 @@
# Returns a sequence string
def sequence(self, num):
s = ''
- sorted_pos_nums = self.abs_res_dict.keys()
- sorted_pos_nums.sort()
- for i in sorted_pos_nums:
+ for i in sorted(self.abs_res_dict):
s += self.abs(i).pos_align_dict[num].aa
return s
@@ -192,13 +197,11 @@
mult_align_dict = {}
for j in self.abs(1).pos_align_dict:
mult_align_dict[j] = ''
- for fssp_rec in self.itervalues():
+ for fssp_rec in self.values():
for j in fssp_rec.pos_align_dict:
mult_align_dict[j] += fssp_rec.pos_align_dict[j].aa
- seq_order = mult_align_dict.keys()
- seq_order.sort()
out_str = ''
- for i in seq_order:
+ for i in sorted(mult_align_dict):
out_str += '> %d\n' % i
k = 0
for j in mult_align_dict[i]:
@@ -222,7 +225,6 @@
header = FSSPHeader()
sum_dict = FSSPSumDict()
align_dict = FSSPAlignDict()
- # fssp_handle=open(fssp_handlename)
curline = fssp_handle.readline()
while not summary_title.match(curline):
# Still in title
@@ -246,7 +248,7 @@
curline = fssp_handle.readline()
if not alignments_title.match(curline):
if equiv_title.match(curline):
- # print "Reached equiv_title"
+ # print("Reached equiv_title")
break
else:
raise ValueError('Bad FSSP file: no alignments title record found')
@@ -266,9 +268,9 @@
align_dict[key].add_align_list(align_list)
curline = fssp_handle.readline()
if not curline:
- print 'EOFEOFEOF'
+ print('EOFEOFEOF')
raise EOFError
- for i in align_dict.itervalues():
+ for i in align_dict.values():
i.pos_align_list2dict()
del i.PosAlignList
align_dict.build_resnum_list()
diff -Nru python-biopython-1.62/Bio/File.py python-biopython-1.63/Bio/File.py
--- python-biopython-1.62/Bio/File.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/File.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,5 +1,6 @@
# Copyright 1999 by Jeffrey Chang. All rights reserved.
-# Copyright 2009-2012 by Peter Cock. All rights reserved.
+# Copyright 2009-2013 by Peter Cock. All rights reserved.
+#
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
@@ -15,14 +16,16 @@
files are also defined under Bio.File but these are not intended for direct
use.
"""
-# For with statement in Python 2.5
-from __future__ import with_statement
+from __future__ import print_function
+
import codecs
import os
+import sys
import contextlib
-import StringIO
import itertools
+from Bio._py3k import basestring
+
try:
from collections import UserDict as _dict_base
except ImportError:
@@ -86,10 +89,10 @@
and index_db functions.
"""
handle = open(filename, "rb")
- import bgzf
+ from . import bgzf
try:
return bgzf.BgzfReader(mode="rb", fileobj=handle)
- except ValueError, e:
+ except ValueError as e:
assert "BGZF" in str(e)
#Not a BGZF file after all, rewind to start:
handle.seek(0)
@@ -113,12 +116,17 @@
def __iter__(self):
return self
- def next(self):
+ def __next__(self):
next = self.readline()
if not next:
raise StopIteration
return next
+ if sys.version_info[0] < 3:
+ def next(self):
+ """Python 2 style alias for Python 3 style __next__ method."""
+ return self.__next__()
+
def readlines(self, *args, **keywds):
lines = self._saved + self._handle.readlines(*args, **keywds)
self._saved = []
@@ -160,9 +168,7 @@
return line
def tell(self):
- lengths = map(len, self._saved)
- sum = reduce(lambda x, y: x+y, lengths, 0)
- return self._handle.tell() - sum
+ return self._handle.tell() - sum(len(line) for line in self._saved)
def seek(self, *args):
self._saved = []
@@ -271,7 +277,7 @@
def __str__(self):
#TODO - How best to handle the __str__ for SeqIO and SearchIO?
if self:
- return "{%r : %s(...), ...}" % (self.keys()[0], self._obj_repr)
+ return "{%r : %s(...), ...}" % (list(self.keys())[0], self._obj_repr)
else:
return "{}"
@@ -282,37 +288,34 @@
"""How many records are there?"""
return len(self._offsets)
- if hasattr(dict, "iteritems"):
- #Python 2, use iteritems but not items etc
- def values(self):
- """Would be a list of the SeqRecord objects, but not implemented.
+ def items(self):
+ """Iterate over the (key, SeqRecord) items.
- In general you can be indexing very very large files, with millions
- of sequences. Loading all these into memory at once as SeqRecord
- objects would (probably) use up all the RAM. Therefore we simply
- don't support this dictionary method.
- """
- raise NotImplementedError("Due to memory concerns, when indexing a "
- "sequence file you cannot access all the "
- "records at once.")
-
- def items(self):
- """Would be a list of the (key, SeqRecord) tuples, but not implemented.
-
- In general you can be indexing very very large files, with millions
- of sequences. Loading all these into memory at once as SeqRecord
- objects would (probably) use up all the RAM. Therefore we simply
- don't support this dictionary method.
- """
- raise NotImplementedError("Due to memory concerns, when indexing a "
- "sequence file you cannot access all the "
- "records at once.")
+ This tries to act like a Python 3 dictionary, and does not return
+ a list of (key, value) pairs due to memory concerns.
+ """
+ for key in self.__iter__():
+ yield key, self.__getitem__(key)
- def keys(self):
- """Return a list of all the keys (SeqRecord identifiers)."""
- #TODO - Stick a warning in here for large lists? Or just refuse?
- return self._offsets.keys()
+ def values(self):
+ """Iterate over the SeqRecord items.
+
+ This tries to act like a Python 3 dictionary, and does not return
+ a list of value due to memory concerns.
+ """
+ for key in self.__iter__():
+ yield self.__getitem__(key)
+
+ def keys(self):
+ """Iterate over the keys.
+
+ This tries to act like a Python 3 dictionary, and does not return
+ a list of keys due to memory concerns.
+ """
+ return self.__iter__()
+ if hasattr(dict, "iteritems"):
+ #Python 2, also define iteritems etc
def itervalues(self):
"""Iterate over the SeqRecord) items."""
for key in self.__iter__():
@@ -327,22 +330,6 @@
"""Iterate over the keys."""
return self.__iter__()
- else:
- #Python 3 - define items and values as iterators
- def items(self):
- """Iterate over the (key, SeqRecord) items."""
- for key in self.__iter__():
- yield key, self.__getitem__(key)
-
- def values(self):
- """Iterate over the SeqRecord items."""
- for key in self.__iter__():
- yield self.__getitem__(key)
-
- def keys(self):
- """Iterate over the keys."""
- return self.__iter__()
-
def __iter__(self):
"""Iterate over the keys."""
return iter(self._offsets)
@@ -487,7 +474,7 @@
if filenames and filenames != self._filenames:
con.close()
raise ValueError("Index file has different filenames")
- except _OperationalError, err:
+ except _OperationalError as err:
con.close()
raise ValueError("Not a Biopython index database? %s" % err)
#Now we have the format (from the DB if not given to us),
@@ -504,7 +491,7 @@
#Create the index
con = _sqlite.connect(index_filename)
self._con = con
- #print "Creating index"
+ #print("Creating index")
# Sqlite PRAGMA settings for speed
con.execute("PRAGMA synchronous=OFF")
con.execute("PRAGMA locking_mode=EXCLUSIVE")
@@ -537,8 +524,8 @@
batch = list(itertools.islice(offset_iter, 100))
if not batch:
break
- #print "Inserting batch of %i offsets, %s ... %s" \
- # % (len(batch), batch[0][0], batch[-1][0])
+ #print("Inserting batch of %i offsets, %s ... %s" \
+ # % (len(batch), batch[0][0], batch[-1][0]))
con.executemany(
"INSERT INTO offset_data (key,file_number,offset,length) VALUES (?,?,?,?);",
batch)
@@ -549,11 +536,11 @@
else:
random_access_proxy._handle.close()
self._length = count
- #print "About to index %i entries" % count
+ #print("About to index %i entries" % count)
try:
con.execute("CREATE UNIQUE INDEX IF NOT EXISTS "
"key_index ON offset_data(key);")
- except _IntegrityError, err:
+ except _IntegrityError as err:
self._proxies = random_access_proxies
self.close()
con.close()
@@ -562,7 +549,7 @@
con.execute("UPDATE meta_data SET value = ? WHERE key = ?;",
(count, "count"))
con.commit()
- #print "Index created"
+ #print("Index created")
self._proxies = random_access_proxies
self._max_open = max_open
self._index_filename = index_filename
diff -Nru python-biopython-1.62/Bio/GA/Crossover/General.py python-biopython-1.63/Bio/GA/Crossover/General.py
--- python-biopython-1.62/Bio/GA/Crossover/General.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GA/Crossover/General.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""General functionality for crossover that doesn't apply.
This collects Crossover stuff that doesn't deal with any specific
diff -Nru python-biopython-1.62/Bio/GA/Crossover/GeneralPoint.py python-biopython-1.63/Bio/GA/Crossover/GeneralPoint.py
--- python-biopython-1.62/Bio/GA/Crossover/GeneralPoint.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GA/Crossover/GeneralPoint.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""
Generalized N-Point Crossover.
@@ -31,6 +36,8 @@
# standard modules
import random
+from Bio._py3k import range
+
class GeneralPointCrossover(object):
"""Perform n-point crossover between genomes at some defined rates.
@@ -85,8 +92,8 @@
xlocs = self._generate_locs( bound[0] )
# copy new genome strings over
- tmp = self._crossover(0, new_org, (x_locs,y_locs))
- new_org[1].genome = self._crossover(1, new_org, (x_locs,y_locs))
+ tmp = self._crossover(0, new_org, (x_locs, y_locs))
+ new_org[1].genome = self._crossover(1, new_org, (x_locs, y_locs))
new_org[0].genome = tmp
return new_org
@@ -103,9 +110,9 @@
"""
results = []
for increment in range(self._npoints):
- x = random.randint(1,bound-1)
+ x = random.randint(1, bound-1)
while (x in results): # uniqueness
- x = random.randint(1,bound-1)
+ x = random.randint(1, bound-1)
results.append( x )
results.sort() # sorted
return [0]+results+[bound] # [0, +n points+, bound]
@@ -125,7 +132,7 @@
return type: sequence (to replace no[x])
"""
s = no[ x ].genome[ :locs[ x ][1] ]
- for n in range(1,self._npoints):
+ for n in range(1, self._npoints):
# flipflop between genome_0 and genome_1
mode = (x+n)%2
# _generate_locs gives us [0, +n points+, bound]
@@ -149,7 +156,7 @@
See GeneralPoint._generate_locs documentation for details
"""
- return [0, random.randint(1,bound-1), bound]
+ return [0, random.randint(1, bound-1), bound]
def _crossover( self, x, no, locs ):
"""Replacement crossover
@@ -166,14 +173,14 @@
Interleaving: AbCdEfG, aBcDeFg
"""
def __init__(self,crossover_prob=0.1):
- GeneralPointCrossover.__init__(self,0,crossover_prob)
+ GeneralPointCrossover.__init__(self, 0, crossover_prob)
- def _generate_locs(self,bound):
- return range(-1,bound+1)
+ def _generate_locs(self, bound):
+ return list(range(-1, bound+1))
def _crossover( self, x, no, locs ):
s = no[ x ].genome[ 0:1 ]
- for n in range(1,self._npoints+2):
+ for n in range(1, self._npoints+2):
mode = ( x+n )%2
s += no[ mode ].genome[ n:n+1 ]
return s+no[mode].genome[self._npoints+3:]
diff -Nru python-biopython-1.62/Bio/GA/Crossover/Point.py python-biopython-1.63/Bio/GA/Crossover/Point.py
--- python-biopython-1.62/Bio/GA/Crossover/Point.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GA/Crossover/Point.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Perform two-point crossovers between the genomes of two organisms.
This module performs single-point crossover between two genomes.
@@ -11,7 +16,7 @@
"""
# standard modules
-from GeneralPoint import TwoCrossover
+from .GeneralPoint import TwoCrossover
class SinglePointCrossover(TwoCrossover):
diff -Nru python-biopython-1.62/Bio/GA/Crossover/TwoPoint.py python-biopython-1.63/Bio/GA/Crossover/TwoPoint.py
--- python-biopython-1.62/Bio/GA/Crossover/TwoPoint.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GA/Crossover/TwoPoint.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Perform two-point crossovers between the genomes of two organisms.
This module performs two-point crossover between two genomes.
@@ -16,7 +21,7 @@
"""
# standard modules
-from GeneralPoint import TwoCrossover
+from .GeneralPoint import TwoCrossover
class TwoPointCrossover(TwoCrossover):
diff -Nru python-biopython-1.62/Bio/GA/Crossover/Uniform.py python-biopython-1.63/Bio/GA/Crossover/Uniform.py
--- python-biopython-1.62/Bio/GA/Crossover/Uniform.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GA/Crossover/Uniform.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Perform uniform crossovers between the genomes of two organisms.
@@ -40,7 +45,7 @@
# determine if we have a crossover
crossover_chance = random.random()
if crossover_chance <= self._crossover_prob:
- minlen = min(len(new_org_1.genome),len(new_org_2.genome))
+ minlen = min(len(new_org_1.genome), len(new_org_2.genome))
for i in range( minlen ):
uniform_chance = random.random()
if uniform_chance <= self._uniform_prob:
diff -Nru python-biopython-1.62/Bio/GA/Evolver.py python-biopython-1.63/Bio/GA/Evolver.py
--- python-biopython-1.62/Bio/GA/Evolver.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GA/Evolver.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,9 +1,16 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Evolution Strategies for a Population.
Evolver classes manage a population of individuals, and are responsible
for taking care of the transition from one generation to the next.
"""
# standard modules
+from __future__ import print_function
+
import sys
@@ -67,7 +74,7 @@
# sort the population so we can look at duplicates
self._population.sort()
for org in self._population:
- print org
+ print(org)
sys.exit()
return self._population
diff -Nru python-biopython-1.62/Bio/GA/Mutation/General.py python-biopython-1.63/Bio/GA/Mutation/General.py
--- python-biopython-1.62/Bio/GA/Mutation/General.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GA/Mutation/General.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""General functionality for mutations.
"""
# standard library
diff -Nru python-biopython-1.62/Bio/GA/Mutation/Simple.py python-biopython-1.63/Bio/GA/Mutation/Simple.py
--- python-biopython-1.62/Bio/GA/Mutation/Simple.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GA/Mutation/Simple.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,8 +1,15 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Perform Simple mutations on an organism's genome.
"""
# standard modules
import random
+from Bio._py3k import range
+
class SinglePositionMutation(object):
"""Perform a conversion mutation, but only at a single point in the genome.
@@ -36,7 +43,7 @@
if mutation_chance <= self._mutation_rate:
# pick a gene position to mutate at
mutation_pos = \
- self._pos_rand.choice(range(len(mutated_org.genome)))
+ self._pos_rand.choice(list(range(len(mutated_org.genome))))
# get a new letter to replace the position at
new_letter = self._switch_rand.choice(gene_choices)
diff -Nru python-biopython-1.62/Bio/GA/Organism.py python-biopython-1.63/Bio/GA/Organism.py
--- python-biopython-1.62/Bio/GA/Organism.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GA/Organism.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Deal with an Organism in a Genetic Algorithm population.
"""
# standard modules
diff -Nru python-biopython-1.62/Bio/GA/Repair/Stabilizing.py python-biopython-1.63/Bio/GA/Repair/Stabilizing.py
--- python-biopython-1.62/Bio/GA/Repair/Stabilizing.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GA/Repair/Stabilizing.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Methods for performing repairs that will Stabilize genomes.
These methods perform repair to keep chromosomes from drifting too far in
@@ -42,7 +47,7 @@
new_org = organism.copy()
# start getting rid of ambiguous items
- while 1:
+ while True:
# first find all of the ambigous items
seq_genome = new_org.genome.toseq()
all_ambiguous = self._ambig_finder.find_ambiguous(str(seq_genome))
diff -Nru python-biopython-1.62/Bio/GA/Selection/Abstract.py python-biopython-1.63/Bio/GA/Selection/Abstract.py
--- python-biopython-1.62/Bio/GA/Selection/Abstract.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GA/Selection/Abstract.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Base selection class from which all Selectors should derive.
"""
diff -Nru python-biopython-1.62/Bio/GA/Selection/Diversity.py python-biopython-1.63/Bio/GA/Selection/Diversity.py
--- python-biopython-1.62/Bio/GA/Selection/Diversity.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GA/Selection/Diversity.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Select individuals into a new population trying to maintain diversity.
This selection mechanism seeks to try and get both high fitness levels
@@ -11,8 +16,8 @@
from Bio.Seq import MutableSeq
# local modules
-from Abstract import AbstractSelection
-from Tournament import TournamentSelection
+from .Abstract import AbstractSelection
+from .Tournament import TournamentSelection
class DiversitySelection(AbstractSelection):
diff -Nru python-biopython-1.62/Bio/GA/Selection/RouletteWheel.py python-biopython-1.63/Bio/GA/Selection/RouletteWheel.py
--- python-biopython-1.62/Bio/GA/Selection/RouletteWheel.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GA/Selection/RouletteWheel.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Implement Roulette Wheel selection on a population.
This implements Roulette Wheel selection in which individuals are
@@ -9,7 +14,7 @@
import copy
# local modules
-from Abstract import AbstractSelection
+from .Abstract import AbstractSelection
class RouletteWheelSelection(AbstractSelection):
@@ -49,8 +54,7 @@
# set up the current probabilities for selecting organisms
# from the population
prob_wheel = self._set_up_wheel(population)
- probs = prob_wheel.keys()
- probs.sort()
+ probs = sorted(prob_wheel)
# now create the new population with the same size as the original
new_population = []
diff -Nru python-biopython-1.62/Bio/GA/Selection/Tournament.py python-biopython-1.63/Bio/GA/Selection/Tournament.py
--- python-biopython-1.62/Bio/GA/Selection/Tournament.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GA/Selection/Tournament.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Provide Tournament style selection.
This implements selection based on a tournament style. In this model of
@@ -9,7 +14,7 @@
import random
# local modules
-from Abstract import AbstractSelection
+from .Abstract import AbstractSelection
class TournamentSelection(AbstractSelection):
diff -Nru python-biopython-1.62/Bio/GenBank/Record.py python-biopython-1.63/Bio/GenBank/Record.py
--- python-biopython-1.62/Bio/GenBank/Record.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GenBank/Record.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Hold GenBank data in a straightforward format.
classes:
diff -Nru python-biopython-1.62/Bio/GenBank/Scanner.py python-biopython-1.63/Bio/GenBank/Scanner.py
--- python-biopython-1.62/Bio/GenBank/Scanner.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GenBank/Scanner.py 2013-12-05 14:10:43.000000000 +0000
@@ -26,6 +26,8 @@
# for more details of this format, and an example.
# Added by Ying Huang & Iddo Friedberg
+from __future__ import print_function
+
import warnings
import re
from Bio.Seq import Seq
@@ -78,23 +80,23 @@
line = self.handle.readline()
if not line:
if self.debug:
- print "End of file"
+ print("End of file")
return None
if line[:self.HEADER_WIDTH] == self.RECORD_START:
if self.debug > 1:
- print "Found the start of a record:\n" + line
+ print("Found the start of a record:\n" + line)
break
line = line.rstrip()
if line == "//":
if self.debug > 1:
- print "Skipping // marking end of last record"
+ print("Skipping // marking end of last record")
elif line == "":
if self.debug > 1:
- print "Skipping blank line before record"
+ print("Skipping blank line before record")
else:
#Ignore any header before the first ID/LOCUS line.
if self.debug > 1:
- print "Skipping header line before record:\n" + line
+ print("Skipping header line before record:\n" + line)
self.line = line
return line
@@ -116,14 +118,14 @@
line = line.rstrip()
if line in self.FEATURE_START_MARKERS:
if self.debug:
- print "Found header table"
+ print("Found header table")
break
#if line[:self.HEADER_WIDTH]==self.FEATURE_START_MARKER[:self.HEADER_WIDTH]:
- # if self.debug : print "Found header table (?)"
+ # if self.debug : print("Found header table (?)")
# break
if line[:self.HEADER_WIDTH].rstrip() in self.SEQUENCE_HEADERS:
if self.debug:
- print "Found start of sequence"
+ print("Found start of sequence")
break
if line == "//":
raise ValueError("Premature end of sequence data marker '//' found")
@@ -143,7 +145,7 @@
"""
if self.line.rstrip() not in self.FEATURE_START_MARKERS:
if self.debug:
- print "Didn't find any feature table"
+ print("Didn't find any feature table")
return []
while self.line.rstrip() in self.FEATURE_START_MARKERS:
@@ -156,14 +158,14 @@
raise ValueError("Premature end of line during features table")
if line[:self.HEADER_WIDTH].rstrip() in self.SEQUENCE_HEADERS:
if self.debug:
- print "Found start of sequence"
+ print("Found start of sequence")
break
line = line.rstrip()
if line == "//":
raise ValueError("Premature end of features table, marker '//' found")
if line in self.FEATURE_END_MARKERS:
if self.debug:
- print "Found end of features"
+ print("Found end of features")
line = self.handle.readline()
break
if line[2:self.FEATURE_QUALIFIER_INDENT].strip() == "":
@@ -260,14 +262,14 @@
Note that no whitespace is removed.
"""
#Skip any blank lines
- iterator = iter(filter(None, lines))
+ iterator = (x for x in lines if x)
try:
- line = iterator.next()
+ line = next(iterator)
feature_location = line.strip()
while feature_location[-1:] == ",":
#Multiline location, still more to come!
- line = iterator.next()
+ line = next(iterator)
feature_location += line.strip()
qualifiers = []
@@ -291,26 +293,26 @@
elif value == '"':
#One single quote
if self.debug:
- print "Single quote %s:%s" % (key, value)
+ print("Single quote %s:%s" % (key, value))
#DO NOT remove the quote...
qualifiers.append((key, value))
elif value[0] == '"':
#Quoted...
value_list = [value]
while value_list[-1][-1] != '"':
- value_list.append(iterator.next())
+ value_list.append(next(iterator))
value = '\n'.join(value_list)
#DO NOT remove the quotes...
qualifiers.append((key, value))
else:
#Unquoted
- #if debug : print "Unquoted line %s:%s" % (key,value)
+ #if debug : print("Unquoted line %s:%s" % (key,value))
qualifiers.append((key, value))
else:
#Unquoted continuation
assert len(qualifiers) > 0
assert key == qualifiers[-1][0]
- #if debug : print "Unquoted Cont %s:%s" % (key, line)
+ #if debug : print("Unquoted Cont %s:%s" % (key, line))
if qualifiers[-1][1] is None:
raise StopIteration
qualifiers[-1] = (key, qualifiers[-1][1] + "\n" + line)
@@ -607,11 +609,30 @@
#Looks like the semi colon separated style introduced in 2006
self._feed_first_line_new(consumer, line)
elif line[self.HEADER_WIDTH:].count(";") == 3:
- #Looks like the pre 2006 style
- self._feed_first_line_old(consumer, line)
+ if line.rstrip().endswith(" SQ"):
+ #EMBL-bank patent data
+ self._feed_first_line_patents(consumer,line)
+ else:
+ #Looks like the pre 2006 style
+ self._feed_first_line_old(consumer, line)
else:
raise ValueError('Did not recognise the ID line layout:\n' + line)
+ def _feed_first_line_patents(self, consumer, line):
+ #Either Non-Redundant Level 1 database records,
+ #ID ; ; ;
+ #e.g. ID NRP_AX000635; PRT; NR1; 15 SQ
+ #
+ #Or, Non-Redundant Level 2 database records:
+ #ID ; ; ;
+ #e.g. ID NRP0000016E; PRT; NR2; 5 SQ
+ fields = line[self.HEADER_WIDTH:].rstrip()[:-3].split(";")
+ assert len(fields) == 4
+ consumer.locus(fields[0])
+ consumer.residue_type(fields[1])
+ consumer.data_file_division(fields[2])
+ #TODO - Record cluster size?
+
def _feed_first_line_old(self, consumer, line):
#Expects an ID line in the style before 2006, e.g.
#ID SC10H5 standard; DNA; PRO; 4870 BP.
@@ -679,7 +700,7 @@
def _feed_seq_length(self, consumer, text):
length_parts = text.split()
assert len(length_parts) == 2, "Invalid sequence length string %r" % text
- assert length_parts[1].upper() in ["BP", "BP.", "AA."]
+ assert length_parts[1].upper() in ["BP", "BP.", "AA", "AA."]
consumer.size(length_parts[0])
def _feed_header_lines(self, consumer, lines):
@@ -785,7 +806,7 @@
getattr(consumer, consumer_dict[line_type])(data)
else:
if self.debug:
- print "Ignoring EMBL header line:\n%s" % line
+ print("Ignoring EMBL header line:\n%s" % line)
def _feed_misc_lines(self, consumer, lines):
#TODO - Should we do something with the information on the SQ line(s)?
@@ -797,7 +818,7 @@
line = line[5:].strip()
contig_location = line
while True:
- line = line_iter.next()
+ line = next(line_iter)
if not line:
break
elif line.startswith("CO "):
@@ -806,6 +827,14 @@
else:
raise ValueError('Expected CO (contig) continuation line, got:\n' + line)
consumer.contig_location(contig_location)
+ if line.startswith("SQ Sequence "):
+ #e.g.
+ #SQ Sequence 219 BP; 82 A; 48 C; 33 G; 45 T; 11 other;
+ #
+ #Or, EMBL-bank patent, e.g.
+ #SQ Sequence 465 AA; 3963407aa91d3a0d622fec679a4524e0; MD5;
+ self._feed_seq_length(consumer, line[14:].rstrip().rstrip(";").split(";", 1)[0])
+ #TODO - Record the checksum etc?
return
except StopIteration:
raise ValueError("Problem in misc lines before sequence")
@@ -839,7 +868,7 @@
"""
if self.line.rstrip() not in self.FEATURE_START_MARKERS:
if self.debug:
- print "Didn't find any feature table"
+ print("Didn't find any feature table")
return []
while self.line.rstrip() in self.FEATURE_START_MARKERS:
@@ -854,14 +883,14 @@
raise ValueError("Premature end of line during features table")
if line[:self.HEADER_WIDTH].rstrip() in self.SEQUENCE_HEADERS:
if self.debug:
- print "Found start of sequence"
+ print("Found start of sequence")
break
line = line.rstrip()
if line == "//":
raise ValueError("Premature end of features table, marker '//' found")
if line in self.FEATURE_END_MARKERS:
if self.debug:
- print "Found end of features"
+ print("Found end of features")
line = self.handle.readline()
break
if line[2:self.FEATURE_QUALIFIER_INDENT].strip() == "":
@@ -1227,11 +1256,11 @@
#VERSION (version and gi)
#REFERENCE (eference_num and reference_bases)
#ORGANISM (organism and taxonomy)
- lines = filter(None, lines)
+ lines = [_f for _f in lines if _f]
lines.append("") # helps avoid getting StopIteration all the time
line_iter = iter(lines)
try:
- line = line_iter.next()
+ line = next(line_iter)
while True:
if not line:
break
@@ -1248,14 +1277,14 @@
consumer.version(data)
else:
if self.debug:
- print "Version [" + data.split(' GI:')[0] + "], gi [" + data.split(' GI:')[1] + "]"
+ print("Version [" + data.split(' GI:')[0] + "], gi [" + data.split(' GI:')[1] + "]")
consumer.version(data.split(' GI:')[0])
consumer.gi(data.split(' GI:')[1])
#Read in the next line!
- line = line_iter.next()
+ line = next(line_iter)
elif line_type == 'REFERENCE':
if self.debug > 1:
- print "Found reference [" + data + "]"
+ print("Found reference [" + data + "]")
#Need to call consumer.reference_num() and consumer.reference_bases()
#e.g.
# REFERENCE 1 (bases 1 to 86436)
@@ -1270,12 +1299,12 @@
#Read in the next line, and see if its more of the reference:
while True:
- line = line_iter.next()
+ line = next(line_iter)
if line[:GENBANK_INDENT] == GENBANK_SPACER:
#Add this continuation to the data string
data += " " + line[GENBANK_INDENT:]
if self.debug > 1:
- print "Extended reference text [" + data + "]"
+ print("Extended reference text [" + data + "]")
else:
#End of the reference, leave this text in the variable "line"
break
@@ -1286,11 +1315,11 @@
data = data.replace(' ', ' ')
if ' ' not in data:
if self.debug > 2:
- print 'Reference number \"' + data + '\"'
+ print('Reference number \"' + data + '\"')
consumer.reference_num(data)
else:
if self.debug > 2:
- print 'Reference number \"' + data[:data.find(' ')] + '\", \"' + data[data.find(' ') + 1:] + '\"'
+ print('Reference number \"' + data[:data.find(' ')] + '\", \"' + data[data.find(' ') + 1:] + '\"')
consumer.reference_num(data[:data.find(' ')])
consumer.reference_bases(data[data.find(' ') + 1:])
elif line_type == 'ORGANISM':
@@ -1305,7 +1334,7 @@
organism_data = data
lineage_data = ""
while True:
- line = line_iter.next()
+ line = next(line_iter)
if line[0:GENBANK_INDENT] == GENBANK_SPACER:
if lineage_data or ";" in line:
lineage_data += " " + line[GENBANK_INDENT:]
@@ -1316,23 +1345,23 @@
break
consumer.organism(organism_data)
if lineage_data.strip() == "" and self.debug > 1:
- print "Taxonomy line(s) missing or blank"
+ print("Taxonomy line(s) missing or blank")
consumer.taxonomy(lineage_data.strip())
del organism_data, lineage_data
elif line_type == 'COMMENT':
if self.debug > 1:
- print "Found comment"
+ print("Found comment")
#This can be multiline, and should call consumer.comment() once
#with a list where each entry is a line.
comment_list = []
comment_list.append(data)
while True:
- line = line_iter.next()
+ line = next(line_iter)
if line[0:GENBANK_INDENT] == GENBANK_SPACER:
data = line[GENBANK_INDENT:]
comment_list.append(data)
if self.debug > 2:
- print "Comment continuation [" + data + "]"
+ print("Comment continuation [" + data + "]")
else:
#End of the comment
break
@@ -1342,7 +1371,7 @@
#Its a semi-automatic entry!
#Now, this may be a multi line entry...
while True:
- line = line_iter.next()
+ line = next(line_iter)
if line[0:GENBANK_INDENT] == GENBANK_SPACER:
data += ' ' + line[GENBANK_INDENT:]
else:
@@ -1352,9 +1381,9 @@
break
else:
if self.debug:
- print "Ignoring GenBank header line:\n" % line
+ print("Ignoring GenBank header line:\n" % line)
#Read in next line
- line = line_iter.next()
+ line = next(line_iter)
except StopIteration:
raise ValueError("Problem in header")
@@ -1370,13 +1399,13 @@
line = line[10:].strip()
if line:
if self.debug:
- print "base_count = " + line
+ print("base_count = " + line)
consumer.base_count(line)
if line.startswith('ORIGIN'):
line = line[6:].strip()
if line:
if self.debug:
- print "origin_name = " + line
+ print("origin_name = " + line)
consumer.origin_name(line)
if line.startswith('WGS '):
line = line[3:].strip()
@@ -1388,7 +1417,7 @@
line = line[6:].strip()
contig_location = line
while True:
- line = line_iter.next()
+ line = next(line_iter)
if not line:
break
elif line[:GENBANK_INDENT] == GENBANK_SPACER:
@@ -1408,7 +1437,7 @@
raise ValueError("Problem in misc lines before sequence")
if __name__ == "__main__":
- from StringIO import StringIO
+ from Bio._py3k import StringIO
gbk_example = \
"""LOCUS SCU49845 5028 bp DNA PLN 21-JUN-1999
@@ -1724,58 +1753,58 @@
//
"""
- print "GenBank CDS Iteration"
- print "====================="
+ print("GenBank CDS Iteration")
+ print("=====================")
g = GenBankScanner()
for record in g.parse_cds_features(StringIO(gbk_example)):
- print record
+ print(record)
g = GenBankScanner()
for record in g.parse_cds_features(StringIO(gbk_example2),
tags2id=('gene', 'locus_tag', 'product')):
- print record
+ print(record)
g = GenBankScanner()
for record in g.parse_cds_features(StringIO(gbk_example + "\n" + gbk_example2),
tags2id=('gene', 'locus_tag', 'product')):
- print record
+ print(record)
- print
- print "GenBank Iteration"
- print "================="
+ print("")
+ print("GenBank Iteration")
+ print("=================")
g = GenBankScanner()
for record in g.parse_records(StringIO(gbk_example), do_features=False):
- print record.id, record.name, record.description
- print record.seq
+ print("%s %s %s" % (record.id, record.name, record.description))
+ print(record.seq)
g = GenBankScanner()
for record in g.parse_records(StringIO(gbk_example), do_features=True):
- print record.id, record.name, record.description
- print record.seq
+ print("%s %s %s" % (record.id, record.name, record.description))
+ print(record.seq)
g = GenBankScanner()
for record in g.parse_records(StringIO(gbk_example2), do_features=False):
- print record.id, record.name, record.description
- print record.seq
+ print("%s %s %s" % (record.id, record.name, record.description))
+ print(record.seq)
g = GenBankScanner()
for record in g.parse_records(StringIO(gbk_example2), do_features=True):
- print record.id, record.name, record.description
- print record.seq
+ print("%s %s %s" % (record.id, record.name, record.description))
+ print(record.seq)
- print
- print "EMBL CDS Iteration"
- print "=================="
+ print("")
+ print("EMBL CDS Iteration")
+ print("==================")
e = EmblScanner()
for record in e.parse_cds_features(StringIO(embl_example)):
- print record
+ print(record)
- print
- print "EMBL Iteration"
- print "=============="
+ print("")
+ print("EMBL Iteration")
+ print("==============")
e = EmblScanner()
for record in e.parse_records(StringIO(embl_example), do_features=True):
- print record.id, record.name, record.description
- print record.seq
+ print("%s %s %s" % (record.id, record.name, record.description))
+ print(record.seq)
diff -Nru python-biopython-1.62/Bio/GenBank/__init__.py python-biopython-1.63/Bio/GenBank/__init__.py
--- python-biopython-1.62/Bio/GenBank/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GenBank/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,5 +1,6 @@
# Copyright 2000 by Jeffrey Chang, Brad Chapman. All rights reserved.
-# Copyright 2006-2011 by Peter Cock. All rights reserved.
+# Copyright 2006-2013 by Peter Cock. All rights reserved.
+#
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
@@ -38,14 +39,17 @@
location parser.
"""
+from __future__ import print_function
+
import re
+import sys # for checking if Python 2
# other Biopython stuff
from Bio import SeqFeature
# other Bio.GenBank stuff
-from utils import FeatureValueCleaner
-from Scanner import GenBankScanner
+from .utils import FeatureValueCleaner
+from .Scanner import GenBankScanner
#Constants used to parse GenBank header lines
GENBANK_INDENT = 12
@@ -65,7 +69,7 @@
_within_position = r"\(\d+\.\d+\)"
_re_within_position = re.compile(_within_position)
_within_location = r"([<>]?\d+|%s)\.\.([<>]?\d+|%s)" \
- % (_within_position,_within_position)
+ % (_within_position, _within_position)
assert _re_within_position.match("(3.9)")
assert re.compile(_within_location).match("(3.9)..10")
assert re.compile(_within_location).match("26..(30.33)")
@@ -74,7 +78,7 @@
_oneof_position = r"one\-of\(\d+(,\d+)+\)"
_re_oneof_position = re.compile(_oneof_position)
_oneof_location = r"([<>]?\d+|%s)\.\.([<>]?\d+|%s)" \
- % (_oneof_position,_oneof_position)
+ % (_oneof_position, _oneof_position)
assert _re_oneof_position.match("one-of(6,9)")
assert re.compile(_oneof_location).match("one-of(6,9)..101")
assert re.compile(_oneof_location).match("one-of(6,9)..one-of(101,104)")
@@ -156,7 +160,7 @@
>>> p = _pos("<5")
>>> p
BeforePosition(5)
- >>> print p
+ >>> print(p)
<5
>>> int(p)
5
@@ -169,7 +173,7 @@
>>> p = _pos("one-of(5,8,11)")
>>> p
OneOfPosition(11, choices=[ExactPosition(5), ExactPosition(8), ExactPosition(11)])
- >>> print p
+ >>> print(p)
one-of(5,8,11)
>>> int(p)
11
@@ -182,7 +186,7 @@
>>> p = _pos("<5", -1)
>>> p
BeforePosition(4)
- >>> print p
+ >>> print(p)
<4
>>> int(p)
4
@@ -203,7 +207,7 @@
elif pos_str.startswith(">"):
return SeqFeature.AfterPosition(int(pos_str[1:])+offset)
elif _re_within_position.match(pos_str):
- s,e = pos_str[1:-1].split(".")
+ s, e = pos_str[1:-1].split(".")
s = int(s) + offset
e = int(e) + offset
if offset == -1:
@@ -284,7 +288,7 @@
#e.g. "123"
s = loc_str
e = loc_str
- return SeqFeature.FeatureLocation(_pos(s,-1), _pos(e), strand)
+ return SeqFeature.FeatureLocation(_pos(s, -1), _pos(e), strand)
def _split_compound_loc(compound_loc):
@@ -367,7 +371,7 @@
self.handle = handle
self._parser = parser
- def next(self):
+ def __next__(self):
"""Return the next GenBank record from the handle.
Will return None if we ran out of records.
@@ -387,8 +391,18 @@
except StopIteration:
return None
+ if sys.version_info[0] < 3:
+ def next(self):
+ """Deprecated Python 2 style alias for Python 3 style __next__ method."""
+ import warnings
+ from Bio import BiopythonDeprecationWarning
+ warnings.warn("Please use next(my_iterator) instead of my_iterator.next(), "
+ "the .next() method is deprecated and will be removed in a "
+ "future release of Biopython.", BiopythonDeprecationWarning)
+ return self.__next__()
+
def __iter__(self):
- return iter(self.next, None)
+ return iter(self.__next__, None)
class ParserFailureError(Exception):
@@ -509,7 +523,7 @@
"""
# first replace all line feeds with spaces
# Also, EMBL style accessions are split with ';'
- accession = accession_string.replace("\n", " ").replace(";"," ")
+ accession = accession_string.replace("\n", " ").replace(";", " ")
return [x.strip() for x in accession.split() if x.strip()]
@@ -560,9 +574,7 @@
"""Replace multiple spaces in the passed text with single spaces.
"""
# get rid of excessive spaces
- text_parts = text.split(" ")
- text_parts = filter(None, text_parts)
- return ' '.join(text_parts)
+ return ' '.join(x for x in text.split(" ") if x)
def _remove_spaces(self, text):
"""Remove all spaces from the passed text.
@@ -672,7 +684,7 @@
self.data.annotations['wgs'] = content.split('-')
def add_wgs_scafld(self, content):
- self.data.annotations.setdefault('wgs_scafld',[]).append(content.split('-'))
+ self.data.annotations.setdefault('wgs_scafld', []).append(content.split('-'))
def nid(self, content):
self.data.annotations['nid'] = content
@@ -708,7 +720,7 @@
"Project:28471" as part of this transition.
"""
content = content.replace("GenomeProject:", "Project:")
- self.data.dbxrefs.extend([p for p in content.split() if p])
+ self.data.dbxrefs.extend(p for p in content.split() if p)
def dblink(self, content):
"""Store DBLINK cross references as dbxrefs in our record object.
@@ -1041,9 +1053,9 @@
ref = None
try:
loc = _loc(part, self._expected_size, part_strand)
- except ValueError, err:
- print location_line
- print part
+ except ValueError as err:
+ print(location_line)
+ print(part)
raise err
f = SeqFeature.SeqFeature(location=loc, ref=ref,
location_operator=cur_feature.location_operator,
@@ -1202,7 +1214,8 @@
seq_alphabet = IUPAC.ambiguous_dna
else:
seq_alphabet = IUPAC.ambiguous_rna
- elif 'PROTEIN' in self._seq_type.upper():
+ elif 'PROTEIN' in self._seq_type.upper() \
+ or self._seq_type == "PRT": # PRT is used in EMBL-bank for patents
seq_alphabet = IUPAC.protein # or extended protein?
# work around ugly GenBank records which have circular or
# linear but no indication of sequence type
@@ -1224,7 +1237,7 @@
"""
def __init__(self):
_BaseGenBankConsumer.__init__(self)
- import Record
+ from . import Record
self.data = Record.Record()
self._seq_data = []
@@ -1286,7 +1299,7 @@
self.data.keywords = self._split_keywords(content)
def project(self, content):
- self.data.projects.extend([p for p in content.split() if p])
+ self.data.projects.extend(p for p in content.split() if p)
def dblink(self, content):
self.data.dblinks.append(content)
@@ -1310,7 +1323,7 @@
if self._cur_reference is not None:
self.data.references.append(self._cur_reference)
- import Record
+ from . import Record
self._cur_reference = Record.Reference()
self._cur_reference.number = content
@@ -1347,11 +1360,11 @@
def comment(self, content):
self.data.comment += "\n".join(content)
- def primary_ref_line(self,content):
+ def primary_ref_line(self, content):
"""Data for the PRIMARY line"""
self.data.primary.append(content)
- def primary(self,content):
+ def primary(self, content):
pass
def features_line(self, content):
@@ -1372,7 +1385,7 @@
# first add on feature information if we've got any
self._add_feature()
- import Record
+ from . import Record
self._cur_feature = Record.Feature()
self._cur_feature.key = content
@@ -1407,7 +1420,7 @@
/pseudo which would be passed in with the next key (since no other
tags separate them in the file)
"""
- import Record
+ from . import Record
for content in content_list:
# the record parser keeps the /s -- add them if we don't have 'em
if not content.startswith("/"):
@@ -1467,11 +1480,10 @@
"""Iterate over GenBank formatted entries as Record objects.
>>> from Bio import GenBank
- >>> handle = open("GenBank/NC_000932.gb")
- >>> for record in GenBank.parse(handle):
- ... print record.accession
+ >>> with open("GenBank/NC_000932.gb") as handle:
+ ... for record in GenBank.parse(handle):
+ ... print(record.accession)
['NC_000932']
- >>> handle.close()
To get SeqRecord objects use Bio.SeqIO.parse(..., format="gb")
instead.
@@ -1483,24 +1495,23 @@
"""Read a handle containing a single GenBank entry as a Record object.
>>> from Bio import GenBank
- >>> handle = open("GenBank/NC_000932.gb")
- >>> record = GenBank.read(handle)
- >>> print record.accession
+ >>> with open("GenBank/NC_000932.gb") as handle:
+ ... record = GenBank.read(handle)
+ ... print(record.accession)
['NC_000932']
- >>> handle.close()
To get a SeqRecord object use Bio.SeqIO.read(..., format="gb")
instead.
"""
iterator = parse(handle)
try:
- first = iterator.next()
+ first = next(iterator)
except StopIteration:
first = None
if first is None:
raise ValueError("No records found in handle")
try:
- second = iterator.next()
+ second = next(iterator)
except StopIteration:
second = None
if second is not None:
@@ -1512,22 +1523,22 @@
"""Run the Bio.GenBank module's doctests."""
import doctest
import os
- if os.path.isdir(os.path.join("..","..","Tests")):
- print "Running doctests..."
+ if os.path.isdir(os.path.join("..", "..", "Tests")):
+ print("Running doctests...")
cur_dir = os.path.abspath(os.curdir)
- os.chdir(os.path.join("..","..","Tests"))
+ os.chdir(os.path.join("..", "..", "Tests"))
doctest.testmod()
os.chdir(cur_dir)
del cur_dir
- print "Done"
+ print("Done")
elif os.path.isdir(os.path.join("Tests")):
- print "Running doctests..."
+ print("Running doctests...")
cur_dir = os.path.abspath(os.curdir)
os.chdir(os.path.join("Tests"))
doctest.testmod()
os.chdir(cur_dir)
del cur_dir
- print "Done"
+ print("Done")
if __name__ == "__main__":
_test()
diff -Nru python-biopython-1.62/Bio/GenBank/utils.py python-biopython-1.63/Bio/GenBank/utils.py
--- python-biopython-1.62/Bio/GenBank/utils.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/GenBank/utils.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Useful utilities for helping in parsing GenBank files.
"""
diff -Nru python-biopython-1.62/Bio/Geo/Record.py python-biopython-1.63/Bio/Geo/Record.py
--- python-biopython-1.62/Bio/Geo/Record.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Geo/Record.py 2013-12-05 14:10:43.000000000 +0000
@@ -4,8 +4,7 @@
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
-Hold GEO data in a straightforward format.
+"""Hold GEO data in a straightforward format.
classes:
o Record - All of the information in an GEO record.
@@ -14,6 +13,8 @@
"""
+from __future__ import print_function
+
class Record(object):
"""Hold GEO information in a format similar to the original record.
@@ -37,55 +38,53 @@
def __str__( self ):
output = ''
- output = output + 'GEO Type: %s\n' % self.entity_type
- output = output + 'GEO Id: %s\n' % self.entity_id
- att_keys = self.entity_attributes.keys()
- att_keys.sort()
+ output += 'GEO Type: %s\n' % self.entity_type
+ output += 'GEO Id: %s\n' % self.entity_id
+ att_keys = sorted(self.entity_attributes)
for key in att_keys:
- contents = self.entity_attributes[ key ]
+ contents = self.entity_attributes[key]
if isinstance(contents, list):
for item in contents:
try:
- output = output + '%s: %s\n' % ( key, item[ :40 ] )
- output = output + out_block( item[ 40: ] )
+ output += '%s: %s\n' % ( key, item[:40])
+ output += out_block(item[40:])
except:
pass
elif isinstance(contents, str):
- output = output + '%s: %s\n' % ( key, contents[ :40 ] )
- output = output + out_block( contents[ 40: ] )
+ output += '%s: %s\n' % (key, contents[:40])
+ output += out_block(contents[40:])
else:
- print contents
- output = output + '%s: %s\n' % ( key, val[ :40 ] )
- output = output + out_block( val[ 40: ] )
- col_keys = self.col_defs.keys()
- col_keys.sort()
- output = output + 'Column Header Definitions\n'
+ print(contents)
+ output += '%s: %s\n' % (key, val[:40])
+ output += out_block(val[40:])
+ col_keys = sorted(self.col_defs.keys())
+ output += 'Column Header Definitions\n'
for key in col_keys:
- val = self.col_defs[ key ]
- output = output + ' %s: %s\n' % ( key, val[ :40 ] )
- output = output + out_block( val[ 40: ], ' ' )
+ val = self.col_defs[key]
+ output += ' %s: %s\n' % (key, val[:40])
+ output += out_block(val[40:], ' ')
#May have to display VERY large tables,
#so only show the first 20 lines of data
- MAX_ROWS = 20+1 # include header in count
+ MAX_ROWS = 20 + 1 # include header in count
for row in self.table_rows[0:MAX_ROWS]:
- output = output + '%s: ' % self.table_rows.index( row )
+ output += '%s: ' % self.table_rows.index(row)
for col in row:
- output = output + '%s\t' % col
- output = output + '\n'
+ output += '%s\t' % col
+ output += '\n'
if len(self.table_rows) > MAX_ROWS:
- output = output + '...\n'
+ output += '...\n'
row = self.table_rows[-1]
- output = output + '%s: ' % self.table_rows.index( row )
+ output += '%s: ' % self.table_rows.index(row)
for col in row:
- output = output + '%s\t' % col
- output = output + '\n'
+ output += '%s\t' % col
+ output += '\n'
return output
def out_block( text, prefix = '' ):
output = ''
- for j in range( 0, len( text ), 80 ):
- output = output + '%s%s\n' % ( prefix, text[ j: j + 80 ] )
- output = output + '\n'
+ for j in range(0, len(text), 80):
+ output += '%s%s\n' % (prefix, text[j:j+80])
+ output += '\n'
return output
diff -Nru python-biopython-1.62/Bio/Geo/__init__.py python-biopython-1.63/Bio/Geo/__init__.py
--- python-biopython-1.62/Bio/Geo/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Geo/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -9,7 +9,7 @@
http://www.ncbi.nlm.nih.gov/geo/
"""
-import Record
+from . import Record
def _read_key_value(line):
@@ -44,7 +44,7 @@
continue
key, value = _read_key_value(line)
if key in record.entity_attributes:
- if type(record.entity_attributes[key])==list:
+ if isinstance(record.entity_attributes[key], list):
record.entity_attributes[key].append(value)
else:
existing = record.entity_attributes[key]
diff -Nru python-biopython-1.62/Bio/Graphics/BasicChromosome.py python-biopython-1.63/Bio/Graphics/BasicChromosome.py
--- python-biopython-1.62/Bio/Graphics/BasicChromosome.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/BasicChromosome.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Draw representations of organism chromosomes with added information.
These classes are meant to model the drawing of pictures of chromosomes.
diff -Nru python-biopython-1.62/Bio/Graphics/ColorSpiral.py python-biopython-1.63/Bio/Graphics/ColorSpiral.py
--- python-biopython-1.62/Bio/Graphics/ColorSpiral.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/ColorSpiral.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Generate RGB colours suitable for distinguishing categorical data.
This module provides a class that implements a spiral 'path' through HSV
@@ -189,5 +194,5 @@
colors = cs.get_colors(len(l))
dict = {}
for item in l:
- dict[item] = colors.next()
+ dict[item] = next(colors)
return dict
diff -Nru python-biopython-1.62/Bio/Graphics/Comparative.py python-biopython-1.63/Bio/Graphics/Comparative.py
--- python-biopython-1.62/Bio/Graphics/Comparative.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/Comparative.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Plots to compare information between different sources.
This file contains high level plots which are designed to be used to
diff -Nru python-biopython-1.62/Bio/Graphics/DisplayRepresentation.py python-biopython-1.63/Bio/Graphics/DisplayRepresentation.py
--- python-biopython-1.62/Bio/Graphics/DisplayRepresentation.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/DisplayRepresentation.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Represent information for graphical display.
Classes in this module are designed to hold information in a way that
diff -Nru python-biopython-1.62/Bio/Graphics/Distribution.py python-biopython-1.63/Bio/Graphics/Distribution.py
--- python-biopython-1.62/Bio/Graphics/Distribution.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/Distribution.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Display information distributed across a Chromosome-like object.
These classes are meant to show the distribution of some kind of information
diff -Nru python-biopython-1.62/Bio/Graphics/GenomeDiagram/_AbstractDrawer.py python-biopython-1.63/Bio/Graphics/GenomeDiagram/_AbstractDrawer.py
--- python-biopython-1.62/Bio/Graphics/GenomeDiagram/_AbstractDrawer.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/GenomeDiagram/_AbstractDrawer.py 2013-12-05 14:10:43.000000000 +0000
@@ -40,6 +40,10 @@
"""
# ReportLab imports
+from __future__ import print_function
+
+from Bio._py3k import range
+
from reportlab.lib import pagesizes
from reportlab.lib import colors
from reportlab.graphics.shapes import *
@@ -190,7 +194,7 @@
strokecolor, color = _stroke_and_fill_colors(color, border)
xy_list = []
- for (x,y) in list_of_points:
+ for (x, y) in list_of_points:
xy_list.append(x)
xy_list.append(y)
@@ -300,7 +304,7 @@
newdata.append((start, graph_data[0][0]+(graph_data[1][0]-graph_data[0][0])/2.,
graph_data[0][1]))
# add middle set
- for index in xrange(1, len(graph_data)-1):
+ for index in range(1, len(graph_data)-1):
lastxval, lastyval = graph_data[index-1]
xval, yval = graph_data[index]
nextxval, nextyval = graph_data[index+1]
diff -Nru python-biopython-1.62/Bio/Graphics/GenomeDiagram/_CircularDrawer.py python-biopython-1.63/Bio/Graphics/GenomeDiagram/_CircularDrawer.py
--- python-biopython-1.62/Bio/Graphics/GenomeDiagram/_CircularDrawer.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/GenomeDiagram/_CircularDrawer.py 2013-12-05 14:10:43.000000000 +0000
@@ -12,16 +12,20 @@
"""CircularDrawer module for GenomeDiagram."""
# ReportLab imports
+from __future__ import print_function
+
from reportlab.graphics.shapes import *
from reportlab.lib import colors
from reportlab.pdfbase import _fontdata
from reportlab.graphics.shapes import ArcPath
+from Bio._py3k import range
+
# GenomeDiagram imports
-from _AbstractDrawer import AbstractDrawer, draw_polygon, intermediate_points
-from _AbstractDrawer import _stroke_and_fill_colors
-from _FeatureSet import FeatureSet
-from _GraphSet import GraphSet
+from ._AbstractDrawer import AbstractDrawer, draw_polygon, intermediate_points
+from ._AbstractDrawer import _stroke_and_fill_colors
+from ._FeatureSet import FeatureSet
+from ._GraphSet import GraphSet
from math import ceil, pi, cos, sin, asin
@@ -437,7 +441,7 @@
# Default to placing the label the bottom of the feature
# as drawn on the page, meaning feature end on left half
label_angle = endangle + 0.5 * pi # Make text radial
- sinval, cosval = endsin,endcos
+ sinval, cosval = endsin, endcos
else:
# Default to placing the label on the bottom of the feature,
# which means the feature end when on right hand half
@@ -492,8 +496,8 @@
if self.end < endB:
endB = self.end
- trackobjA = cross_link._trackA(self._parent.tracks.values())
- trackobjB = cross_link._trackB(self._parent.tracks.values())
+ trackobjA = cross_link._trackA(list(self._parent.tracks.values()))
+ trackobjB = cross_link._trackB(list(self._parent.tracks.values()))
assert trackobjA is not None
assert trackobjB is not None
if trackobjA == trackobjB:
@@ -572,7 +576,7 @@
# Get graph data
data_quartiles = graph.quartiles()
- minval, maxval = data_quartiles[0],data_quartiles[4]
+ minval, maxval = data_quartiles[0], data_quartiles[4]
btm, ctr, top = self.track_radii[self.current_track_level]
trackheight = 0.5*(top-btm)
datarange = maxval - minval
@@ -627,7 +631,7 @@
# Set the number of pixels per unit for the data
data_quartiles = graph.quartiles()
- minval, maxval = data_quartiles[0],data_quartiles[4]
+ minval, maxval = data_quartiles[0], data_quartiles[4]
btm, ctr, top = self.track_radii[self.current_track_level]
trackheight = 0.5*(top-btm)
datarange = maxval - minval
@@ -686,7 +690,7 @@
# Get graph data
data_quartiles = graph.quartiles()
- minval, maxval = data_quartiles[0],data_quartiles[4]
+ minval, maxval = data_quartiles[0], data_quartiles[4]
midval = (maxval + minval)/2. # mid is the value at the X-axis
btm, ctr, top = self.track_radii[self.current_track_level]
trackheight = (top-btm)
@@ -820,7 +824,7 @@
for set in track.get_sets():
if set.__class__ is GraphSet:
# Y-axis
- for n in xrange(7):
+ for n in range(7):
angle = n * 1.0471975511965976
if angle < startangle or endangle < angle:
continue
@@ -912,7 +916,7 @@
#if 0.5*pi < tickangle < 1.5*pi:
# y1 -= label_offset
labelgroup = Group(label)
- labelgroup.transform = (1,0,0,1, x1, y1)
+ labelgroup.transform = (1, 0, 0, 1, x1, y1)
else:
labelgroup = None
return tick, labelgroup
@@ -978,7 +982,7 @@
theta, costheta, sintheta = self.canvas_angle(pos)
if theta < startangle or endangle < theta:
continue
- x,y = self.xcenter+btm*sintheta, self.ycenter+btm*costheta # start text halfway up marker
+ x, y = self.xcenter+btm*sintheta, self.ycenter+btm*costheta # start text halfway up marker
labelgroup = Group(label)
labelangle = self.sweep*2*pi*(pos-self.start)/self.length - pi/2
if theta > pi:
@@ -1061,12 +1065,12 @@
# Calculate trig values for angle and coordinates
startcos, startsin = cos(startangle), sin(startangle)
endcos, endsin = cos(endangle), sin(endangle)
- x0,y0 = self.xcenter, self.ycenter # origin of the circle
- x1,y1 = (x0+inner_radius*startsin, y0+inner_radius*startcos)
- x2,y2 = (x0+inner_radius*endsin, y0+inner_radius*endcos)
- x3,y3 = (x0+outer_radius*endsin, y0+outer_radius*endcos)
- x4,y4 = (x0+outer_radius*startsin, y0+outer_radius*startcos)
- return draw_polygon([(x1,y1),(x2,y2),(x3,y3),(x4,y4)], color, border)
+ x0, y0 = self.xcenter, self.ycenter # origin of the circle
+ x1, y1 = (x0+inner_radius*startsin, y0+inner_radius*startcos)
+ x2, y2 = (x0+inner_radius*endsin, y0+inner_radius*endcos)
+ x3, y3 = (x0+outer_radius*endsin, y0+outer_radius*endcos)
+ x4, y4 = (x0+outer_radius*startsin, y0+outer_radius*startcos)
+ return draw_polygon([(x1, y1), (x2, y2), (x3, y3), (x4, y4)], color, border)
def _draw_arc_line(self, path, start_radius, end_radius, start_angle, end_angle,
move=False):
@@ -1145,11 +1149,11 @@
inner_endcos, inner_endsin = cos(inner_endangle), sin(inner_endangle)
outer_startcos, outer_startsin = cos(outer_startangle), sin(outer_startangle)
outer_endcos, outer_endsin = cos(outer_endangle), sin(outer_endangle)
- x1,y1 = (x0+inner_radius*inner_startsin, y0+inner_radius*inner_startcos)
- x2,y2 = (x0+inner_radius*inner_endsin, y0+inner_radius*inner_endcos)
- x3,y3 = (x0+outer_radius*outer_endsin, y0+outer_radius*outer_endcos)
- x4,y4 = (x0+outer_radius*outer_startsin, y0+outer_radius*outer_startcos)
- return draw_polygon([(x1,y1),(x2,y2),(x3,y3),(x4,y4)], color, border,
+ x1, y1 = (x0+inner_radius*inner_startsin, y0+inner_radius*inner_startcos)
+ x2, y2 = (x0+inner_radius*inner_endsin, y0+inner_radius*inner_endcos)
+ x3, y3 = (x0+outer_radius*outer_endsin, y0+outer_radius*outer_endcos)
+ x4, y4 = (x0+outer_radius*outer_startsin, y0+outer_radius*outer_startcos)
+ return draw_polygon([(x1, y1), (x2, y2), (x3, y3), (x4, y4)], color, border,
#default is mitre/miter which can stick out too much:
strokeLineJoin=1, # 1=round
)
@@ -1181,7 +1185,7 @@
shaft_inner_radius = inner_radius + corner_len
shaft_outer_radius = outer_radius - corner_len
- cornerangle_delta = max(0.0,min(abs(boxheight)*0.5*corner/middle_radius, abs(angle*0.5)))
+ cornerangle_delta = max(0.0, min(abs(boxheight)*0.5*corner/middle_radius, abs(angle*0.5)))
if angle < 0:
cornerangle_delta *= -1 # reverse it
@@ -1271,7 +1275,7 @@
shaft_height = boxheight*shaft_height_ratio
shaft_inner_radius = middle_radius - 0.5*shaft_height
shaft_outer_radius = middle_radius + 0.5*shaft_height
- headangle_delta = max(0.0,min(abs(boxheight)*head_length_ratio/middle_radius, abs(angle)))
+ headangle_delta = max(0.0, min(abs(boxheight)*head_length_ratio/middle_radius, abs(angle)))
if angle < 0:
headangle_delta *= -1 # reverse it
if orientation=="right":
@@ -1290,21 +1294,21 @@
startcos, startsin = cos(startangle), sin(startangle)
headcos, headsin = cos(headangle), sin(headangle)
endcos, endsin = cos(endangle), sin(endangle)
- x0,y0 = self.xcenter, self.ycenter # origin of the circle
+ x0, y0 = self.xcenter, self.ycenter # origin of the circle
if 0.5 >= abs(angle) and abs(headangle_delta) >= abs(angle):
#If the angle is small, and the arrow is all head,
#cheat and just use a triangle.
if orientation=="right":
- x1,y1 = (x0+inner_radius*startsin, y0+inner_radius*startcos)
- x2,y2 = (x0+outer_radius*startsin, y0+outer_radius*startcos)
- x3,y3 = (x0+middle_radius*endsin, y0+middle_radius*endcos)
+ x1, y1 = (x0+inner_radius*startsin, y0+inner_radius*startcos)
+ x2, y2 = (x0+outer_radius*startsin, y0+outer_radius*startcos)
+ x3, y3 = (x0+middle_radius*endsin, y0+middle_radius*endcos)
else:
- x1,y1 = (x0+inner_radius*endsin, y0+inner_radius*endcos)
- x2,y2 = (x0+outer_radius*endsin, y0+outer_radius*endcos)
- x3,y3 = (x0+middle_radius*startsin, y0+middle_radius*startcos)
+ x1, y1 = (x0+inner_radius*endsin, y0+inner_radius*endcos)
+ x2, y2 = (x0+outer_radius*endsin, y0+outer_radius*endcos)
+ x3, y3 = (x0+middle_radius*startsin, y0+middle_radius*startcos)
#return draw_polygon([(x1,y1),(x2,y2),(x3,y3)], color, border,
# stroke_line_join=1)
- return Polygon([x1,y1,x2,y2,x3,y3],
+ return Polygon([x1, y1, x2, y2, x3, y3],
strokeColor=border or color,
fillColor=color,
strokeLineJoin=1, # 1=round, not mitre!
@@ -1425,7 +1429,7 @@
startcos, startsin = cos(startangle), sin(startangle)
headcos, headsin = cos(headangle), sin(headangle)
endcos, endsin = cos(endangle), sin(endangle)
- x0,y0 = self.xcenter, self.ycenter # origin of the circle
+ x0, y0 = self.xcenter, self.ycenter # origin of the circle
p = ArcPath(strokeColor=strokecolor,
fillColor=color,
diff -Nru python-biopython-1.62/Bio/Graphics/GenomeDiagram/_Colors.py python-biopython-1.63/Bio/Graphics/GenomeDiagram/_Colors.py
--- python-biopython-1.62/Bio/Graphics/GenomeDiagram/_Colors.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/GenomeDiagram/_Colors.py 2013-12-05 14:10:43.000000000 +0000
@@ -21,6 +21,9 @@
"""
# ReportLab imports
+from __future__ import print_function
+from Bio._py3k import basestring
+
from reportlab.lib import colors
@@ -107,20 +110,20 @@
Reads information from a file containing color information and
stores it internally
"""
- lines = open(filename, 'r').readlines()
- for line in lines:
- data = line.strip().split('\t')
- try:
- label = int(data[0])
- red, green, blue = int(data[1]), int(data[2]), int(data[3])
- if len(data) > 4:
- comment = data[4]
- else:
- comment = ""
- self._colorscheme[label] = (self.int255_color((red, green, blue)),
- comment)
- except:
- raise ValueError("Expected INT \t INT \t INT \t INT \t string input")
+ with open(filename, 'r').readlines() as lines:
+ for line in lines:
+ data = line.strip().split('\t')
+ try:
+ label = int(data[0])
+ red, green, blue = int(data[1]), int(data[2]), int(data[3])
+ if len(data) > 4:
+ comment = data[4]
+ else:
+ comment = ""
+ self._colorscheme[label] = (self.int255_color((red, green, blue)),
+ comment)
+ except:
+ raise ValueError("Expected INT \t INT \t INT \t INT \t string input")
def get_artemis_colorscheme(self):
""" get_artemis_colorscheme(self)
@@ -145,7 +148,7 @@
value = int(value)
except ValueError:
if value.count('.'): # dot-delimited
- value = int(artemis_color.split('.',1)[0]) # Use only first integer
+ value = int(artemis_color.split('.', 1)[0]) # Use only first integer
else:
raise
if value in self._artemis_colorscheme:
@@ -209,12 +212,12 @@
# Test code
gdct = ColorTranslator()
- print gdct.float1_color((0.5, 0.5, 0.5))
- print gdct.int255_color((1, 75, 240))
- print gdct.artemis_color(7)
- print gdct.scheme_color(2)
-
- print gdct.translate((0.5, 0.5, 0.5))
- print gdct.translate((1, 75, 240))
- print gdct.translate(7)
- print gdct.translate(2)
+ print(gdct.float1_color((0.5, 0.5, 0.5)))
+ print(gdct.int255_color((1, 75, 240)))
+ print(gdct.artemis_color(7))
+ print(gdct.scheme_color(2))
+
+ print(gdct.translate((0.5, 0.5, 0.5)))
+ print(gdct.translate((1, 75, 240)))
+ print(gdct.translate(7))
+ print(gdct.translate(2))
diff -Nru python-biopython-1.62/Bio/Graphics/GenomeDiagram/_Diagram.py python-biopython-1.63/Bio/Graphics/GenomeDiagram/_Diagram.py
--- python-biopython-1.62/Bio/Graphics/GenomeDiagram/_Diagram.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/GenomeDiagram/_Diagram.py 2013-12-05 14:10:43.000000000 +0000
@@ -36,9 +36,9 @@
renderPM=None
# GenomeDiagram
-from _LinearDrawer import LinearDrawer
-from _CircularDrawer import CircularDrawer
-from _Track import Track
+from ._LinearDrawer import LinearDrawer
+from ._CircularDrawer import CircularDrawer
+from ._Track import Track
from Bio.Graphics import _write
@@ -315,7 +315,7 @@
#just uses a cStringIO or StringIO handle with the drawToFile method.
#In order to put all our complicated file format specific code in one
#place we'll just use a StringIO handle here:
- from StringIO import StringIO
+ from Bio._py3k import StringIO
handle = StringIO()
self.write(handle, output, dpi)
return handle.getvalue()
@@ -335,8 +335,7 @@
if track_level not in self.tracks: # No track at that level
self.tracks[track_level] = track # so just add it
else: # Already a track there, so shunt all higher tracks up one
- occupied_levels = self.get_levels() # Get list of occupied levels...
- occupied_levels.sort() # ...sort it...
+ occupied_levels = sorted(self.get_levels()) # Get list of occupied levels...
occupied_levels.reverse() # ...reverse it (highest first)
for val in occupied_levels:
# If track value >= that to be added
@@ -360,8 +359,7 @@
if track_level not in self.tracks: # No track at that level
self.tracks[track_level] = newtrack # so just add it
else: # Already a track there, so shunt all higher tracks up one
- occupied_levels = self.get_levels() # Get list of occupied levels...
- occupied_levels.sort() # ...sort it...
+ occupied_levels = sorted(self.get_levels()) # Get list of occupied levels...
occupied_levels.reverse() # ...reverse (highest first)...
for val in occupied_levels:
if val >= track_level: # Track value >= that to be added
@@ -384,7 +382,7 @@
Returns a list of the tracks contained in the diagram
"""
- return self.tracks.values()
+ return list(self.tracks.values())
def move_track(self, from_level, to_level):
""" move_track(self, from_level, to_level)
@@ -425,9 +423,7 @@
Return a sorted list of levels occupied by tracks in the diagram
"""
- levels = self.tracks.keys()
- levels.sort()
- return levels
+ return sorted(self.tracks)
def get_drawn_levels(self):
""" get_drawn_levels(self) -> [int, int, ...]
@@ -435,10 +431,7 @@
Return a sorted list of levels occupied by tracks that are not
explicitly hidden
"""
- drawn_levels = [key for key in self.tracks.keys() if
- not self.tracks[key].hide] # get list of shown levels
- drawn_levels.sort()
- return drawn_levels
+ return sorted(key for key in self.tracks if not self.tracks[key].hide)
def range(self):
""" range(self) -> (int, int)
diff -Nru python-biopython-1.62/Bio/Graphics/GenomeDiagram/_Feature.py python-biopython-1.63/Bio/Graphics/GenomeDiagram/_Feature.py
--- python-biopython-1.62/Bio/Graphics/GenomeDiagram/_Feature.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/GenomeDiagram/_Feature.py 2013-12-05 14:10:43.000000000 +0000
@@ -27,7 +27,7 @@
from reportlab.lib import colors
# GenomeDiagram imports
-from _Colors import ColorTranslator
+from ._Colors import ColorTranslator
class Feature(object):
diff -Nru python-biopython-1.62/Bio/Graphics/GenomeDiagram/_FeatureSet.py python-biopython-1.63/Bio/Graphics/GenomeDiagram/_FeatureSet.py
--- python-biopython-1.62/Bio/Graphics/GenomeDiagram/_FeatureSet.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/GenomeDiagram/_FeatureSet.py 2013-12-05 14:10:43.000000000 +0000
@@ -34,11 +34,13 @@
# IMPORTS
# ReportLab
+from __future__ import print_function
+
from reportlab.pdfbase import _fontdata
from reportlab.lib import colors
# GenomeDiagram
-from _Feature import Feature
+from ._Feature import Feature
# Builtins
import re
@@ -181,7 +183,7 @@
"""
# If no attribute or value specified, return all features
if attribute is None or value is None:
- return self.features.values()
+ return list(self.features.values())
# If no comparator is specified, return all features where the attribute
# value matches that passed
if comparator is None:
@@ -210,7 +212,7 @@
Return a list of all ids for the feature set
"""
- return self.features.keys()
+ return list(self.features.keys())
def range(self):
""" range(self)
diff -Nru python-biopython-1.62/Bio/Graphics/GenomeDiagram/_Graph.py python-biopython-1.63/Bio/Graphics/GenomeDiagram/_Graph.py
--- python-biopython-1.62/Bio/Graphics/GenomeDiagram/_Graph.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/GenomeDiagram/_Graph.py 2013-12-05 14:10:43.000000000 +0000
@@ -28,6 +28,8 @@
"""
# ReportLab imports
+from __future__ import print_function
+
from reportlab.lib import colors
from math import sqrt
@@ -146,7 +148,7 @@
Return data as a list of sorted (position, value) tuples
"""
data = []
- for xval in self.data.keys():
+ for xval in self.data:
yval = self.data[xval]
data.append((xval, yval))
data.sort()
@@ -168,8 +170,7 @@
Returns the (minimum, lowerQ, medianQ, upperQ, maximum) values as
a tuple
"""
- data = self.data.values()
- data.sort()
+ data = sorted(self.data.values())
datalen = len(data)
return(data[0], data[datalen//4], data[datalen//2],
data[3*datalen//4], data[-1])
@@ -180,8 +181,7 @@
Returns the range of the data, i.e. its start and end points on
the genome as a (start, end) tuple
"""
- positions = self.data.keys()
- positions.sort()
+ positions = sorted(self.data) # i.e. dict keys
# Return first and last positions in graph
#print len(self.data)
return (positions[0], positions[-1])
@@ -191,7 +191,7 @@
Returns the mean value for the data points
"""
- data = self.data.values()
+ data = list(self.data.values())
sum = 0.
for item in data:
sum += float(item)
@@ -202,7 +202,7 @@
Returns the sample standard deviation for the data
"""
- data = self.data.values()
+ data = list(self.data.values())
m = self.mean()
runtotal = 0.
for entry in data:
@@ -238,10 +238,8 @@
high = index.stop
if index.step is not None and index.step != 1:
raise ValueError
- positions = self.data.keys()
- positions.sort()
outlist = []
- for pos in positions:
+ for pos in sorted(self.data):
if pos >= low and pos <=high:
outlist.append((pos, self.data[pos]))
return outlist
diff -Nru python-biopython-1.62/Bio/Graphics/GenomeDiagram/_GraphSet.py python-biopython-1.63/Bio/Graphics/GenomeDiagram/_GraphSet.py
--- python-biopython-1.62/Bio/Graphics/GenomeDiagram/_GraphSet.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/GenomeDiagram/_GraphSet.py 2013-12-05 14:10:43.000000000 +0000
@@ -30,9 +30,11 @@
"""
# ReportLab imports
+from __future__ import print_function
+
from reportlab.lib import colors
-from _Graph import GraphData
+from ._Graph import GraphData
class GraphSet(object):
@@ -141,16 +143,14 @@
Return a list of all graphs in the graph set, sorted by id (for
reliable stacking...)
"""
- ids = self._graphs.keys()
- ids.sort()
- return [self._graphs[id] for id in ids]
+ return [self._graphs[id] for id in sorted(self._graphs)]
def get_ids(self):
""" get_ids(self) -> [int, int, ...]
Return a list of all ids for the graph set
"""
- return self._graphs.keys()
+ return list(self._graphs.keys())
def range(self):
""" range(self) -> (int, int)
@@ -172,7 +172,7 @@
"""
data = []
for graph in self._graphs.values():
- data += graph.data.values()
+ data += list(graph.data.values())
data.sort()
datalen = len(data)
return(data[0], data[datalen/4], data[datalen/2],
@@ -235,4 +235,4 @@
gdgs.add_graph(testdata1, 'TestData 1')
gdgs.add_graph(testdata2, 'TestData 2')
- print gdgs
+ print(gdgs)
diff -Nru python-biopython-1.62/Bio/Graphics/GenomeDiagram/_LinearDrawer.py python-biopython-1.63/Bio/Graphics/GenomeDiagram/_LinearDrawer.py
--- python-biopython-1.62/Bio/Graphics/GenomeDiagram/_LinearDrawer.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/GenomeDiagram/_LinearDrawer.py 2013-12-05 14:10:43.000000000 +0000
@@ -27,15 +27,17 @@
"""
# ReportLab imports
+from __future__ import print_function
+
from reportlab.graphics.shapes import *
from reportlab.lib import colors
# GenomeDiagram imports
-from _AbstractDrawer import AbstractDrawer, draw_box, draw_arrow
-from _AbstractDrawer import draw_cut_corner_box, _stroke_and_fill_colors
-from _AbstractDrawer import intermediate_points, angle2trig
-from _FeatureSet import FeatureSet
-from _GraphSet import GraphSet
+from ._AbstractDrawer import AbstractDrawer, draw_box, draw_arrow
+from ._AbstractDrawer import draw_cut_corner_box, _stroke_and_fill_colors
+from ._AbstractDrawer import intermediate_points, angle2trig
+from ._FeatureSet import FeatureSet
+from ._GraphSet import GraphSet
from math import ceil
@@ -635,7 +637,7 @@
else:
x2 = self.xlim
box = draw_box((x1, tbtm), (x2, ttop), # Grey track bg
- colors.Color(0.96,0.96, 0.96)) # is just a box
+ colors.Color(0.96, 0.96, 0.96)) # is just a box
greytrack_bgs.append(box)
if track.greytrack_labels: # If labels are required
@@ -724,7 +726,7 @@
# several parts, and one or more of those parts may end up being
# drawn on a non-existent fragment. So we check that the start and
# end fragments do actually exist in terms of the drawing
- allowed_fragments = self.fragment_limits.keys()
+ allowed_fragments = list(self.fragment_limits.keys())
if start_fragment in allowed_fragments and end_fragment in allowed_fragments:
#print feature.name, feature.start, feature.end, start_offset, end_offset
if start_fragment == end_fragment: # Feature is found on one fragment
@@ -783,8 +785,8 @@
if self.end < endB:
endB = self.end
- trackobjA = cross_link._trackA(self._parent.tracks.values())
- trackobjB = cross_link._trackB(self._parent.tracks.values())
+ trackobjA = cross_link._trackA(list(self._parent.tracks.values()))
+ trackobjB = cross_link._trackB(list(self._parent.tracks.values()))
assert trackobjA is not None
assert trackobjB is not None
if trackobjA == trackobjB:
@@ -818,7 +820,7 @@
strokecolor, fillcolor = _stroke_and_fill_colors(cross_link.color, cross_link.border)
- allowed_fragments = self.fragment_limits.keys()
+ allowed_fragments = list(self.fragment_limits.keys())
start_fragmentA, start_offsetA = self.canvas_location(startA)
end_fragmentA, end_offsetA = self.canvas_location(endA)
@@ -895,14 +897,14 @@
if fragment < start_fragmentB:
extra = [self.x0 + self.pagewidth, 0.5 * (yA + yB)]
else:
- extra = [self.x0 , 0.5 * (yA + yB)]
+ extra = [self.x0, 0.5 * (yA + yB)]
else:
if fragment < start_fragmentB:
extra = [self.x0 + self.pagewidth, 0.7*yA + 0.3*yB,
self.x0 + self.pagewidth, 0.3*yA + 0.7*yB]
else:
- extra = [self.x0 , 0.3*yA + 0.7*yB,
- self.x0 , 0.7*yA + 0.3*yB]
+ extra = [self.x0, 0.3*yA + 0.7*yB,
+ self.x0, 0.7*yA + 0.3*yB]
answer.append(Polygon([xAs, yA, xAe, yA] + extra,
strokeColor=strokecolor,
fillColor=fillcolor,
@@ -915,14 +917,14 @@
if fragment < start_fragmentA:
extra = [self.x0 + self.pagewidth, 0.5 * (yA + yB)]
else:
- extra = [self.x0 , 0.5 * (yA + yB)]
+ extra = [self.x0, 0.5 * (yA + yB)]
else:
if fragment < start_fragmentA:
extra = [self.x0 + self.pagewidth, 0.3*yA + 0.7*yB,
self.x0 + self.pagewidth, 0.7*yA + 0.3*yB]
else:
- extra = [self.x0 , 0.7*yA + 0.3*yB,
- self.x0 , 0.3*yA + 0.7*yB]
+ extra = [self.x0, 0.7*yA + 0.3*yB,
+ self.x0, 0.3*yA + 0.7*yB]
answer.append(Polygon([xBs, yB, xBe, yB] + extra,
strokeColor=strokecolor,
fillColor=fillcolor,
@@ -989,15 +991,15 @@
ctr += self.fragment_lines[fragment][0]
top += self.fragment_lines[fragment][0]
except: # Only called if the method screws up big time
- print "We've got a screw-up"
- print self.start, self.end
- print self.fragment_bases
- print x0, x1
+ print("We've got a screw-up")
+ print("%s %s" % (self.start, self.end))
+ print(self.fragment_bases)
+ print("%r %r" % (x0, x1))
for locstart, locend in feature.locations:
- print self.canvas_location(locstart)
- print self.canvas_location(locend)
- print 'FEATURE\n', feature
- 1/0
+ print(self.canvas_location(locstart))
+ print(self.canvas_location(locend))
+ print('FEATURE\n%s' % feature)
+ raise
# Distribution dictionary for various ways of drawing the feature
draw_methods = {'BOX': self._draw_sigil_box,
@@ -1094,7 +1096,7 @@
# Get graph data
data_quartiles = graph.quartiles()
- minval, maxval = data_quartiles[0],data_quartiles[4]
+ minval, maxval = data_quartiles[0], data_quartiles[4]
btm, ctr, top = self.track_offsets[self.current_track_level]
trackheight = 0.5*(top-btm)
datarange = maxval - minval
@@ -1164,7 +1166,7 @@
# Get graph data and information
data_quartiles = graph.quartiles()
- minval, maxval = data_quartiles[0],data_quartiles[4]
+ minval, maxval = data_quartiles[0], data_quartiles[4]
midval = (maxval + minval)/2. # mid is the value at the X-axis
btm, ctr, top = self.track_offsets[self.current_track_level]
trackheight = (top-btm)
@@ -1242,7 +1244,7 @@
# Set the number of pixels per unit for the data
data_quartiles = graph.quartiles()
- minval, maxval = data_quartiles[0],data_quartiles[4]
+ minval, maxval = data_quartiles[0], data_quartiles[4]
btm, ctr, top = self.track_offsets[self.current_track_level]
trackheight = 0.5*(top-btm)
datarange = maxval - minval
@@ -1346,7 +1348,7 @@
else:
y1 = bottom
y2 = top
- return draw_box((x1,y1), (x2,y2), **kwargs)
+ return draw_box((x1, y1), (x2, y2), **kwargs)
def _draw_sigil_octo(self, bottom, center, top, x1, x2, strand, **kwargs):
"""Draw OCTO sigil, a box with the corners cut off."""
@@ -1359,7 +1361,7 @@
else:
y1 = bottom
y2 = top
- return draw_cut_corner_box((x1,y1), (x2,y2), **kwargs)
+ return draw_cut_corner_box((x1, y1), (x2, y2), **kwargs)
def _draw_sigil_jaggy(self, bottom, center, top, x1, x2, strand,
color, border=None, **kwargs):
@@ -1421,7 +1423,7 @@
y1 = bottom
y2 = top
orientation = "right" # backward compatibility
- return draw_arrow((x1,y1), (x2,y2), orientation=orientation, **kwargs)
+ return draw_arrow((x1, y1), (x2, y2), orientation=orientation, **kwargs)
def _draw_sigil_big_arrow(self, bottom, center, top, x1, x2, strand, **kwargs):
"""Draw BIGARROW sigil, like ARROW but straddles the axis."""
@@ -1429,4 +1431,4 @@
orientation = "left"
else:
orientation = "right"
- return draw_arrow((x1,bottom), (x2,top), orientation=orientation, **kwargs)
+ return draw_arrow((x1, bottom), (x2, top), orientation=orientation, **kwargs)
diff -Nru python-biopython-1.62/Bio/Graphics/GenomeDiagram/_Track.py python-biopython-1.63/Bio/Graphics/GenomeDiagram/_Track.py
--- python-biopython-1.62/Bio/Graphics/GenomeDiagram/_Track.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/GenomeDiagram/_Track.py 2013-12-05 14:10:43.000000000 +0000
@@ -27,12 +27,15 @@
"""
-# ReportLab imports
+from __future__ import print_function
+
from reportlab.lib import colors
+from Bio._py3k import range
+
# GenomeDiagram imports
-from _FeatureSet import FeatureSet
-from _GraphSet import GraphSet
+from ._FeatureSet import FeatureSet
+from ._GraphSet import GraphSet
class Track(object):
@@ -88,7 +91,7 @@
labels on the grey track
o greytrack_font_rotation Int describing the angle through which to
- rotate the grey track labels
+ rotate the grey track labels (Linear only)
o greytrack_font_color colors.Color describing the color to draw
the grey track labels
@@ -166,7 +169,7 @@
labels on the grey track
o greytrack_font_rotation Int describing the angle through which to
- rotate the grey track labels
+ rotate the grey track labels (Linear only)
o greytrack_font_color colors.Color describing the color to draw
the grey track labels (overridden by
@@ -301,14 +304,14 @@
Return the sets contained in this track
"""
- return self._sets.values()
+ return list(self._sets.values())
def get_ids(self):
""" get_ids(self) -> [int, int, ...]
Return the ids of all sets contained in this track
"""
- return self._sets.keys()
+ return list(self._sets.keys())
def range(self):
""" range(self) -> (int, int)
@@ -378,8 +381,8 @@
# test code
from Bio import SeqIO
- from _FeatureSet import FeatureSet
- from _GraphSet import GraphSet
+ from ._FeatureSet import FeatureSet
+ from ._GraphSet import GraphSet
from random import normalvariate
genbank_entry = SeqIO.read('/data/genomes/Bacteria/Nanoarchaeum_equitans/NC_005213.gbk', 'gb')
@@ -397,15 +400,15 @@
gdt.add_set(gdfs2)
graphdata = []
- for pos in xrange(1, len(genbank_entry.seq), 1000):
+ for pos in range(1, len(genbank_entry.seq), 1000):
graphdata.append((pos, normalvariate(0.5, 0.1)))
gdgs = GraphSet(2, 'test data')
gdgs.add_graph(graphdata, 'Test Data')
gdt.add_set(gdgs)
- print gdt.get_ids()
+ print(gdt.get_ids())
sets = gdt.get_sets()
for set in sets:
- print set
+ print(set)
- print gdt.get_element_limits()
+ print(gdt.get_element_limits())
diff -Nru python-biopython-1.62/Bio/Graphics/GenomeDiagram/__init__.py python-biopython-1.63/Bio/Graphics/GenomeDiagram/__init__.py
--- python-biopython-1.62/Bio/Graphics/GenomeDiagram/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/GenomeDiagram/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -12,11 +12,11 @@
#Local imports, to make these classes available directly under the
#Bio.Graphics.GenomeDiagram namespace:
-from _Diagram import Diagram
-from _Track import Track
-from _FeatureSet import FeatureSet
-from _GraphSet import GraphSet
-from _CrossLink import CrossLink
+from ._Diagram import Diagram
+from ._Track import Track
+from ._FeatureSet import FeatureSet
+from ._GraphSet import GraphSet
+from ._CrossLink import CrossLink
#Not (currently) made public,
#from _Colors import ColorTranslator
diff -Nru python-biopython-1.62/Bio/Graphics/__init__.py python-biopython-1.63/Bio/Graphics/__init__.py
--- python-biopython-1.62/Bio/Graphics/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Graphics/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -61,7 +61,7 @@
#If output is not a string, then .upper() will trigger
#an attribute error...
drawmethod = formatdict[format.upper()] # select drawing method
- except (KeyError,AttributeError):
+ except (KeyError, AttributeError):
raise ValueError("Output format should be one of %s"
% ", ".join(formatdict))
diff -Nru python-biopython-1.62/Bio/HMM/DynamicProgramming.py python-biopython-1.63/Bio/HMM/DynamicProgramming.py
--- python-biopython-1.62/Bio/HMM/DynamicProgramming.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/HMM/DynamicProgramming.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,9 +1,16 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Dynamic Programming algorithms for general usage.
This module contains classes which implement Dynamic Programming
algorithms that can be used generally.
"""
+from Bio._py3k import range
+
class AbstractDPAlgorithms(object):
"""An abstract class to calculate forward and backward probabilities.
@@ -137,7 +144,7 @@
# -- recursion
# first loop over the training sequence backwards
# Recursion step: (i = L - 1 ... 1)
- all_indexes = range(len(self._seq.emissions) - 1)
+ all_indexes = list(range(len(self._seq.emissions) - 1))
all_indexes.reverse()
for i in all_indexes:
# now loop over the letters in the state path
diff -Nru python-biopython-1.62/Bio/HMM/MarkovModel.py python-biopython-1.63/Bio/HMM/MarkovModel.py
--- python-biopython-1.62/Bio/HMM/MarkovModel.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/HMM/MarkovModel.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Deal with representations of Markov Models.
"""
# standard modules
@@ -8,7 +13,8 @@
#TODO - Take advantage of defaultdict once Python 2.4 is dead?
#from collections import defaultdict
-# biopython
+from Bio._py3k import range
+
from Bio.Seq import MutableSeq
@@ -191,7 +197,7 @@
self.initial_prob = copy.copy(initial_prob)
# ensure that all referenced states are valid
- for state in initial_prob.iterkeys():
+ for state in initial_prob:
assert state in self._state_alphabet.letters, \
"State %s was not found in the sequence alphabet" % state
@@ -264,7 +270,7 @@
"allow_transition or allow_all_transitions first.")
transitions_from = _calculate_from_transitions(self.transition_prob)
- for from_state in transitions_from.keys():
+ for from_state in transitions_from:
freqs = _gen_random_array(len(transitions_from[from_state]))
for to_state in transitions_from[from_state]:
self.transition_prob[(from_state, to_state)] = freqs.pop()
@@ -282,7 +288,7 @@
"Allow some or all emissions.")
emissions = _calculate_emissions(self.emission_prob)
- for state in emissions.iterkeys():
+ for state in emissions:
freqs = _gen_random_array(len(emissions[state]))
for symbol in emissions[state]:
self.emission_prob[(state, symbol)] = freqs.pop()
@@ -617,7 +623,7 @@
# --- traceback
traceback_seq = MutableSeq('', state_alphabet)
- loop_seq = range(1, len(sequence))
+ loop_seq = list(range(1, len(sequence)))
loop_seq.reverse()
# last_state is the last state in the most probable state sequence.
diff -Nru python-biopython-1.62/Bio/HMM/Trainer.py python-biopython-1.63/Bio/HMM/Trainer.py
--- python-biopython-1.62/Bio/HMM/Trainer.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/HMM/Trainer.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Provide trainers which estimate parameters based on training sequences.
These should be used to 'train' a Markov Model prior to actually using
@@ -16,7 +21,7 @@
import math
# local stuff
-from DynamicProgramming import ScaledDPAlgorithms
+from .DynamicProgramming import ScaledDPAlgorithms
class TrainingSequence(object):
@@ -104,8 +109,7 @@
calculation.
"""
# get an ordered list of all items
- all_ordered = counts.keys()
- all_ordered.sort()
+ all_ordered = sorted(counts)
ml_estimation = {}
@@ -191,7 +195,7 @@
prev_log_likelihood = None
num_iterations = 1
- while 1:
+ while True:
transition_count = self._markov_model.get_blank_transitions()
emission_count = self._markov_model.get_blank_emissions()
diff -Nru python-biopython-1.62/Bio/HMM/Utilities.py python-biopython-1.63/Bio/HMM/Utilities.py
--- python-biopython-1.62/Bio/HMM/Utilities.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/HMM/Utilities.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Generic functions which are useful for working with HMMs.
This just collects general functions which you might like to use in
@@ -5,6 +10,8 @@
"""
+from __future__ import print_function
+
def pretty_print_prediction(emissions, real_state, predicted_state,
emission_title = "Emissions",
real_title = "Real State",
@@ -33,19 +40,19 @@
cur_position = 0
# while we still have more than seq_length characters to print
- while 1:
+ while True:
if (cur_position + seq_length) < len(emissions):
extension = seq_length
else:
extension = len(emissions) - cur_position
- print "%s%s" % (emission_title,
- emissions[cur_position:cur_position + seq_length])
- print "%s%s" % (real_title,
- real_state[cur_position:cur_position + seq_length])
- print "%s%s\n" % (predicted_title,
+ print("%s%s" % (emission_title,
+ emissions[cur_position:cur_position + seq_length]))
+ print("%s%s" % (real_title,
+ real_state[cur_position:cur_position + seq_length]))
+ print("%s%s\n" % (predicted_title,
predicted_state[cur_position:
- cur_position + seq_length])
+ cur_position + seq_length]))
if (len(emissions) < (cur_position + seq_length)):
break
diff -Nru python-biopython-1.62/Bio/HotRand.py python-biopython-1.63/Bio/HotRand.py
--- python-biopython-1.62/Bio/HotRand.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/HotRand.py 2013-12-05 14:10:43.000000000 +0000
@@ -8,19 +8,24 @@
support biosimulations that rely on random numbers.
"""
-import urllib
+from __future__ import print_function
+
+#Importing these functions with leading underscore as not intended for reuse
+from Bio._py3k import urlopen as _urlopen
+from Bio._py3k import urlencode as _urlencode
+
from Bio import BiopythonDeprecationWarning
import warnings
warnings.warn("The HotRand module is deprecated and likely to be removed in a future release of Biopython. Please use an alternative RNG.", BiopythonDeprecationWarning)
-def byte_concat( text ):
+def byte_concat(text):
val = 0
- numbytes = len( text )
- for i in range( 0, numbytes ):
+ numbytes = len(text)
+ for i in range(0, numbytes):
val = val * 256
- val = val + ord( text[ i ] )
-
+ # Slice trick for Python 2 and 3 to get single char (byte) string:
+ val += ord(text[i:i+1])
return val
@@ -29,46 +34,47 @@
def __init__( self ):
# self.url = 'http://www.fourmilab.ch/cgi-bin/uncgi/Hotbits?num=5000&min=1&max=6&col=1'
self.url = 'http://www.random.org/cgi-bin/randbyte?'
- self.query = { 'nbytes': 128, 'fmt': 'h' }
+ self.query = {'nbytes': 128, 'fmt': 'h'}
self.fill_hot_cache()
- def fill_hot_cache( self ):
- url = self.url + urllib.urlencode( self.query )
- fh = urllib.urlopen( url )
+ def fill_hot_cache(self):
+ url = self.url + _urlencode(self.query)
+ fh = _urlopen(url)
self.hot_cache = fh.read()
fh.close()
- def next_num( self, num_digits = 4 ):
+ def next_num(self, num_digits=4):
cache = self.hot_cache
- numbytes = num_digits / 2
- if( len( cache ) % numbytes != 0 ):
- print 'len_cache is %d' % len( cache )
+ # Must explicitly use integer division on python 3
+ numbytes = num_digits // 2
+ if len(cache) % numbytes != 0:
+ print('len_cache is %d' % len(cache))
raise ValueError
- if( cache == '' ):
+ if cache == '':
self.fill_hot_cache()
cache = self.hot_cache
- hexdigits = cache[ :numbytes ]
- self.hot_cache = cache[ numbytes: ]
- return byte_concat( hexdigits )
+ hexdigits = cache[:numbytes]
+ self.hot_cache = cache[numbytes:]
+ return byte_concat(hexdigits)
class HotRandom(object):
- def __init__( self ):
+ def __init__(self):
self.hot_cache = HotCache( )
- def hot_rand( self, high, low = 0 ):
+ def hot_rand(self, high, low=0):
span = high - low
val = self.hot_cache.next_num()
- val = ( span * val ) >> 16
- val = val + low
+ val = (span * val) >> 16
+ val += low
return val
-if( __name__ == '__main__' ):
+if __name__ == '__main__':
hot_random = HotRandom()
- for j in range( 0, 130 ):
- print hot_random.hot_rand( 25 )
- nums = [ '0000', 'abcd', '1234', '5555', '4321', 'aaaa', 'ffff' ]
+ for j in range(0, 130):
+ print(hot_random.hot_rand(25))
+ nums = ['0000', 'abcd', '1234', '5555', '4321', 'aaaa', 'ffff']
for num in nums:
- print hex_convert( num )
+ print(int(num, 16))
diff -Nru python-biopython-1.62/Bio/Index.py python-biopython-1.63/Bio/Index.py
--- python-biopython-1.62/Bio/Index.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Index.py 2013-12-05 14:10:43.000000000 +0000
@@ -16,9 +16,12 @@
"""
import os
import array
-import cPickle
import shelve
+try:
+ import cPickle as pickle # Only available under Python 2
+except ImportError:
+ import pickle # Python 3
class _ShelveIndex(dict):
"""An index file wrapped around shelve.
@@ -89,16 +92,16 @@
# Load the database if it exists
if os.path.exists(indexname):
- handle = open(indexname)
- version = self._toobj(handle.readline().rstrip())
- if version != self.__version:
- raise IOError("Version %s doesn't match my version %s"
- % (version, self.__version))
- for line in handle:
- key, value = line.split()
- key, value = self._toobj(key), self._toobj(value)
- self[key] = value
- self.__changed = 0
+ with open(indexname) as handle:
+ version = self._toobj(handle.readline().rstrip())
+ if version != self.__version:
+ raise IOError("Version %s doesn't match my version %s"
+ % (version, self.__version))
+ for line in handle:
+ key, value = line.split()
+ key, value = self._toobj(key), self._toobj(value)
+ self[key] = value
+ self.__changed = 0
def update(self, dict):
self.__changed = 1
@@ -118,12 +121,11 @@
def __del__(self):
if self.__changed:
- handle = open(self._indexname, 'w')
- handle.write("%s\n" % self._tostr(self.__version))
- for key, value in self.items():
- handle.write("%s %s\n" %
- (self._tostr(key), self._tostr(value)))
- handle.close()
+ with open(self._indexname, 'w') as handle:
+ handle.write("%s\n" % self._tostr(self.__version))
+ for key, value in self.items():
+ handle.write("%s %s\n" %
+ (self._tostr(key), self._tostr(value)))
def _tostr(self, obj):
# I need a representation of the object that's saveable to
@@ -133,15 +135,13 @@
# the integers into strings and join them together with commas.
# It's not the most efficient way of storing things, but it's
# relatively fast.
- s = cPickle.dumps(obj)
+ s = pickle.dumps(obj)
intlist = array.array('b', s)
- strlist = map(str, intlist)
- return ','.join(strlist)
+ return ','.join(str(i) for i in intlist)
def _toobj(self, str):
- intlist = map(int, str.split(','))
+ intlist = [int(i) for i in str.split(',')]
intlist = array.array('b', intlist)
- strlist = map(chr, intlist)
- return cPickle.loads(''.join(strlist))
+ return pickle.loads(''.join(chr(i) for i in intlist))
Index = _InMemoryIndex
diff -Nru python-biopython-1.62/Bio/KDTree/KDTree.py python-biopython-1.63/Bio/KDTree/KDTree.py
--- python-biopython-1.62/Bio/KDTree/KDTree.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/KDTree/KDTree.py 2013-12-05 14:10:43.000000000 +0000
@@ -3,8 +3,7 @@
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
-KD tree data structure for searching N-dimensional vectors.
+"""KD tree data structure for searching N-dimensional vectors.
The KD tree data structure can be used for all kinds of searches that
involve N-dimensional vectors, e.g. neighbor searches (find all points
@@ -14,6 +13,8 @@
Otfried Schwarzkopf). Author: Thomas Hamelryck.
"""
+from __future__ import print_function
+
from numpy import sum, sqrt, array
from numpy.random import random
@@ -54,9 +55,9 @@
else:
l2 = len(r)
if l1 == l2:
- print "Passed."
+ print("Passed.")
else:
- print "Not passed: %i != %i." % (l1, l2)
+ print("Not passed: %i != %i." % (l1, l2))
def _test(nr_points, dim, bucket_size, radius):
@@ -87,9 +88,9 @@
if _dist(p, center) <= radius:
l2 = l2 + 1
if l1 == l2:
- print "Passed."
+ print("Passed.")
else:
- print "Not passed: %i != %i." % (l1, l2)
+ print("Not passed: %i != %i." % (l1, l2))
class KDTree(object):
@@ -245,7 +246,7 @@
indices = kdtree.all_get_indices()
radii = kdtree.all_get_radii()
- print "Found %i point pairs within radius %f." % (len(indices), query_radius)
+ print("Found %i point pairs within radius %f." % (len(indices), query_radius))
# Do 10 individual queries
@@ -261,4 +262,4 @@
radii = kdtree.get_radii()
x, y, z = center
- print "Found %i points in radius %f around center (%.2f, %.2f, %.2f)." % (len(indices), query_radius, x, y, z)
+ print("Found %i points in radius %f around center (%.2f, %.2f, %.2f)." % (len(indices), query_radius, x, y, z))
diff -Nru python-biopython-1.62/Bio/KDTree/__init__.py python-biopython-1.63/Bio/KDTree/__init__.py
--- python-biopython-1.62/Bio/KDTree/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/KDTree/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""
The KD tree data structure can be used for all kinds of searches that
involve N-dimensional vectors. For example, neighbor searches (find all points
@@ -7,4 +12,4 @@
Otfried Schwarzkopf).
"""
-from KDTree import KDTree
+from .KDTree import KDTree
diff -Nru python-biopython-1.62/Bio/KEGG/Compound/__init__.py python-biopython-1.63/Bio/KEGG/Compound/__init__.py
--- python-biopython-1.62/Bio/KEGG/Compound/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/KEGG/Compound/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -4,8 +4,7 @@
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
-This module provides code to work with the KEGG Ligand/Compound database.
+"""Code to work with the KEGG Ligand/Compound database.
Functions:
parse - Returns an iterator giving Record objects.
@@ -15,18 +14,20 @@
"""
# other Biopython stuff
+from __future__ import print_function
+
from Bio.KEGG import _write_kegg
from Bio.KEGG import _wrap_kegg
# Set up line wrapping rules (see Bio.KEGG._wrap_kegg)
name_wrap = [0, "",
- (" ","$",1,1),
- ("-","$",1,1)]
+ (" ", "$", 1, 1),
+ ("-", "$", 1, 1)]
id_wrap = lambda indent : [indent, "",
- (" ","",1,0)]
+ (" ", "", 1, 0)]
struct_wrap = lambda indent : [indent, "",
- (" ","",1,1)]
+ (" ", "", 1, 1)]
class Record(object):
@@ -132,10 +133,10 @@
example, using one of the example KEGG files in the Biopython
test suite,
- >>> handle = open("KEGG/compound.sample")
- >>> for record in parse(handle):
- ... print record.entry, record.name[0]
- ...
+ >>> with open("KEGG/compound.sample") as handle:
+ ... for record in parse(handle):
+ ... print("%s %s" % (record.entry, record.name[0]))
+ ...
C00023 Iron
C00017 Protein
C00099 beta-Alanine
@@ -144,7 +145,6 @@
C00348 Undecaprenyl phosphate
C00349 2-Methyl-3-oxopropanoate
C01386 NH2Mec
- >>> handle.close()
"""
record = Record()
@@ -174,7 +174,7 @@
record.enzyme.append(enzyme)
elif keyword=="PATHWAY ":
if data[:5]=='PATH:':
- path, map, name = data.split(None,2)
+ path, map, name = data.split(None, 2)
pathway = (path[:-1], map, name)
record.pathway.append(pathway)
else:
@@ -204,3 +204,4 @@
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
+
diff -Nru python-biopython-1.62/Bio/KEGG/Enzyme/__init__.py python-biopython-1.63/Bio/KEGG/Enzyme/__init__.py
--- python-biopython-1.62/Bio/KEGG/Enzyme/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/KEGG/Enzyme/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -4,8 +4,7 @@
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
-This module provides code to work with the KEGG Enzyme database.
+"""Code to work with the KEGG Enzyme database.
Functions:
parse - Returns an iterator giving Record objects.
@@ -14,23 +13,25 @@
Record -- Holds the information from a KEGG Enzyme record.
"""
+from __future__ import print_function
+
from Bio.KEGG import _write_kegg
from Bio.KEGG import _wrap_kegg
# Set up line wrapping rules (see Bio.KEGG._wrap_kegg)
rxn_wrap = [0, "",
- (" + ","",1,1),
- (" = ","",1,1),
- (" ","$",1,1),
- ("-","$",1,1)]
+ (" + ", "", 1, 1),
+ (" = ", "", 1, 1),
+ (" ", "$", 1, 1),
+ ("-", "$", 1, 1)]
name_wrap = [0, "",
- (" ","$",1,1),
- ("-","$",1,1)]
+ (" ", "$", 1, 1),
+ ("-", "$", 1, 1)]
id_wrap = lambda indent : [indent, "",
- (" ","",1,0)]
+ (" ", "", 1, 0)]
struct_wrap = lambda indent : [indent, "",
- (" ","",1,1)]
+ (" ", "", 1, 1)]
class Record(object):
@@ -202,10 +203,10 @@
example, using one of the example KEGG files in the Biopython
test suite,
- >>> handle = open("KEGG/enzyme.sample")
- >>> for record in parse(handle):
- ... print record.entry, record.name[0]
- ...
+ >>> with open("KEGG/enzyme.sample") as handle:
+ ... for record in parse(handle):
+ ... print("%s %s" % (record.entry, record.name[0]))
+ ...
1.1.1.1 Alcohol dehydrogenase
1.1.1.62 Estradiol 17beta-dehydrogenase
1.1.1.68 Transferred to EC 1.7.99.5
@@ -214,7 +215,6 @@
2.4.1.68 Glycoprotein 6-alpha-L-fucosyltransferase
3.1.1.6 Acetylesterase
2.7.2.1 Acetate kinase
- >>> handle.close()
"""
record = Record()
@@ -263,7 +263,7 @@
record.effector.append(data.strip(";"))
elif keyword=="GENES ":
if data[3:5]==': ':
- key, values = data.split(":",1)
+ key, values = data.split(":", 1)
values = [value.split("(")[0] for value in values.split()]
row = (key, values)
record.genes.append(row)
@@ -281,11 +281,11 @@
record.name.append(data.strip(";"))
elif keyword=="PATHWAY ":
if data[:5]=='PATH:':
- _, map_num, name = data.split(None,2)
+ _, map_num, name = data.split(None, 2)
pathway = ('PATH', map_num, name)
record.pathway.append(pathway)
else:
- ec_num, name = data.split(None,1)
+ ec_num, name = data.split(None, 1)
pathway = 'PATH', ec_num, name
record.pathway.append(pathway)
elif keyword=="PRODUCT ":
@@ -313,3 +313,4 @@
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
+
diff -Nru python-biopython-1.62/Bio/KEGG/__init__.py python-biopython-1.63/Bio/KEGG/__init__.py
--- python-biopython-1.62/Bio/KEGG/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/KEGG/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -37,7 +37,7 @@
indent = " " * wrap_rule[0]
connect = wrap_rule[1]
rules = wrap_rule[2:]
- while 1:
+ while True:
if len(line) <= max_width:
wrapped_line = wrapped_line + line
s = s + wrapped_line
diff -Nru python-biopython-1.62/Bio/LogisticRegression.py python-biopython-1.63/Bio/LogisticRegression.py
--- python-biopython-1.62/Bio/LogisticRegression.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/LogisticRegression.py 2013-12-05 14:10:43.000000000 +0000
@@ -2,8 +2,7 @@
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
-This module provides code for doing logistic regressions.
+"""Code for doing logistic regressions.
Classes:
@@ -16,6 +15,8 @@
classify Classify an observation into a class.
"""
+from __future__ import print_function
+
import numpy
import numpy.linalg
@@ -99,8 +100,8 @@
Xtyp = numpy.dot(Xt, y-p) # Calculate the first derivative.
XtWX = numpy.dot(numpy.dot(Xt, W), X) # Calculate the second derivative.
#u, s, vt = singular_value_decomposition(XtWX)
- #print "U", u
- #print "S", s
+ #print("U %s" % u)
+ #print("S %s" % s)
delta = numpy.linalg.solve(XtWX, Xtyp)
if numpy.fabs(stepsize-1.0) > 0.001:
delta = delta * stepsize
@@ -109,7 +110,7 @@
raise RuntimeError("Didn't converge.")
lr = LogisticRegression()
- lr.beta = map(float, beta) # Convert back to regular array.
+ lr.beta = [float(x) for x in beta] # Convert back to regular array.
return lr
diff -Nru python-biopython-1.62/Bio/MarkovModel.py python-biopython-1.63/Bio/MarkovModel.py
--- python-biopython-1.62/Bio/MarkovModel.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/MarkovModel.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""
This is an implementation of a state-emitting MarkovModel. I am using
terminology similar to Manning and Schutze.
@@ -61,8 +66,8 @@
self.p_emission = p_emission
def __str__(self):
- import StringIO
- handle = StringIO.StringIO()
+ from Bio._py3k import StringIO
+ handle = StringIO()
save(self, handle)
handle.seek(0)
return handle.read()
@@ -100,14 +105,14 @@
line = _readline_and_check_start(handle, "TRANSITION:")
for i in range(len(states)):
line = _readline_and_check_start(handle, " %s:" % states[i])
- mm.p_transition[i,:] = map(float, line.split()[1:])
+ mm.p_transition[i,:] = [float(v) for v in line.split()[1:]]
# Load the emission.
mm.p_emission = numpy.zeros((N, M))
line = _readline_and_check_start(handle, "EMISSION:")
for i in range(len(states)):
line = _readline_and_check_start(handle, " %s:" % states[i])
- mm.p_emission[i,:] = map(float, line.split()[1:])
+ mm.p_emission[i,:] = [float(v) for v in line.split()[1:]]
return mm
@@ -123,12 +128,10 @@
w(" %s: %g\n" % (mm.states[i], mm.p_initial[i]))
w("TRANSITION:\n")
for i in range(len(mm.p_transition)):
- x = map(str, mm.p_transition[i])
- w(" %s: %s\n" % (mm.states[i], ' '.join(x)))
+ w(" %s: %s\n" % (mm.states[i], ' '.join(str(x) for x in mm.p_transition[i])))
w("EMISSION:\n")
for i in range(len(mm.p_emission)):
- x = map(str, mm.p_emission[i])
- w(" %s: %s\n" % (mm.states[i], ' '.join(x)))
+ w(" %s: %s\n" % (mm.states[i], ' '.join(str(x) for x in mm.p_emission[i])))
# XXX allow them to specify starting points
@@ -164,12 +167,12 @@
raise ValueError("pseudo_initial not shape len(states)")
if pseudo_transition is not None:
pseudo_transition = numpy.asarray(pseudo_transition)
- if pseudo_transition.shape != (N,N):
+ if pseudo_transition.shape != (N, N):
raise ValueError("pseudo_transition not shape " +
"len(states) X len(states)")
if pseudo_emission is not None:
pseudo_emission = numpy.asarray(pseudo_emission)
- if pseudo_emission.shape != (N,M):
+ if pseudo_emission.shape != (N, M):
raise ValueError("pseudo_emission not shape " +
"len(states) X len(alphabet)")
@@ -182,7 +185,7 @@
training_outputs.append([indexes[x] for x in outputs])
# Do some sanity checking on the outputs.
- lengths = map(len, training_outputs)
+ lengths = [len(x) for x in training_outputs]
if min(lengths) == 0:
raise ValueError("I got training data with outputs of length 0")
@@ -209,17 +212,18 @@
p_initial = _copy_and_check(p_initial, (N,))
if p_transition is None:
- p_transition = _random_norm((N,N))
+ p_transition = _random_norm((N, N))
else:
- p_transition = _copy_and_check(p_transition, (N,N))
+ p_transition = _copy_and_check(p_transition, (N, N))
if p_emission is None:
- p_emission = _random_norm((N,M))
+ p_emission = _random_norm((N, M))
else:
- p_emission = _copy_and_check(p_emission, (N,M))
+ p_emission = _copy_and_check(p_emission, (N, M))
# Do all the calculations in log space to avoid underflows.
- lp_initial, lp_transition, lp_emission = map(
- numpy.log, (p_initial, p_transition, p_emission))
+ lp_initial = numpy.log(p_initial)
+ lp_transition = numpy.log(p_transition)
+ lp_emission = numpy.log(p_emission)
if pseudo_initial is not None:
lpseudo_initial = numpy.log(pseudo_initial)
else:
@@ -255,7 +259,7 @@
% MAX_ITERATIONS)
# Return everything back in normal space.
- return map(numpy.exp, (lp_initial, lp_transition, lp_emission))
+ return [numpy.exp(x) for x in (lp_initial, lp_transition, lp_emission)]
def _baum_welch_one(N, M, outputs,
@@ -286,13 +290,13 @@
bmat[j][t+1]
lp_traverse[i][j] = lp
# Normalize the probability for this time step.
- lp_arc[:,:,t] = lp_traverse - _logsum(lp_traverse)
+ lp_arc[:,:, t] = lp_traverse - _logsum(lp_traverse)
# Sum of all the transitions out of state i at time t.
lp_arcout_t = numpy.zeros((N, T))
for t in range(T):
for i in range(N):
- lp_arcout_t[i][t] = _logsum(lp_arc[i,:,t])
+ lp_arcout_t[i][t] = _logsum(lp_arc[i,:, t])
# Sum of all the transitions out of state i.
lp_arcout = numpy.zeros(N)
@@ -300,7 +304,7 @@
lp_arcout[i] = _logsum(lp_arcout_t[i,:])
# UPDATE P_INITIAL.
- lp_initial = lp_arcout_t[:,0]
+ lp_initial = lp_arcout_t[:, 0]
if lpseudo_initial is not None:
lp_initial = _logvecadd(lp_initial, lpseudo_initial)
lp_initial = lp_initial - _logsum(lp_initial)
@@ -310,7 +314,7 @@
# transitions out of i.
for i in range(N):
for j in range(N):
- lp_transition[i][j] = _logsum(lp_arc[i,j,:]) - lp_arcout[i]
+ lp_transition[i][j] = _logsum(lp_arc[i, j,:]) - lp_arcout[i]
if lpseudo_transition is not None:
lp_transition[i] = _logvecadd(lp_transition[i], lpseudo_transition)
lp_transition[i] = lp_transition[i] - _logsum(lp_transition[i])
@@ -323,7 +327,7 @@
for t in range(T):
k = outputs[t]
for j in range(N):
- ksum[k] = logaddexp(ksum[k], lp_arc[i,j,t])
+ ksum[k] = logaddexp(ksum[k], lp_arc[i, j, t])
ksum = ksum - _logsum(ksum) # Normalize
if lpseudo_emission is not None:
ksum = _logvecadd(ksum, lpseudo_emission[i])
@@ -337,7 +341,7 @@
# the _forward algorithm and calculate from the clean one, but
# that may be more expensive than overshooting the training by one
# step.
- return _logsum(fmat[:,T])
+ return _logsum(fmat[:, T])
def _forward(N, T, lp_initial, lp_transition, lp_emission, outputs):
@@ -348,7 +352,7 @@
matrix = numpy.zeros((N, T+1))
# Initialize the first column to be the initial values.
- matrix[:,0] = lp_initial
+ matrix[:, 0] = lp_initial
for t in range(1, T+1):
k = outputs[t-1]
for j in range(N):
@@ -408,12 +412,12 @@
raise ValueError("pseudo_initial not shape len(states)")
if pseudo_transition is not None:
pseudo_transition = numpy.asarray(pseudo_transition)
- if pseudo_transition.shape != (N,N):
+ if pseudo_transition.shape != (N, N):
raise ValueError("pseudo_transition not shape " +
"len(states) X len(states)")
if pseudo_emission is not None:
pseudo_emission = numpy.asarray(pseudo_emission)
- if pseudo_emission.shape != (N,M):
+ if pseudo_emission.shape != (N, M):
raise ValueError("pseudo_emission not shape " +
"len(states) X len(alphabet)")
@@ -449,7 +453,7 @@
# p_transition is the probability that a state leads to the next
# one. C(i,j)/C(i) where i and j are states.
- p_transition = numpy.zeros((N,N))
+ p_transition = numpy.zeros((N, N))
if pseudo_transition:
p_transition = p_transition + pseudo_transition
for states in training_states:
@@ -461,10 +465,10 @@
# p_emission is the probability of an output given a state.
# C(s,o)|C(s) where o is an output and s is a state.
- p_emission = numpy.zeros((N,M))
+ p_emission = numpy.zeros((N, M))
if pseudo_emission:
p_emission = p_emission + pseudo_emission
- p_emission = numpy.ones((N,M))
+ p_emission = numpy.ones((N, M))
for outputs, states in zip(training_outputs, training_states):
for o, s in zip(outputs, states):
p_emission[s, o] += 1
@@ -485,10 +489,9 @@
# _viterbi does calculations in log space. Add a tiny bit to the
# matrices so that the logs will not break.
- x = mm.p_initial + VERY_SMALL_NUMBER
- y = mm.p_transition + VERY_SMALL_NUMBER
- z = mm.p_emission + VERY_SMALL_NUMBER
- lp_initial, lp_transition, lp_emission = map(numpy.log, (x, y, z))
+ lp_initial = numpy.log(mm.p_initial + VERY_SMALL_NUMBER)
+ lp_transition = numpy.log(mm.p_transition + VERY_SMALL_NUMBER)
+ lp_emission = numpy.log(mm.p_emission + VERY_SMALL_NUMBER)
# Change output into a list of indexes into the alphabet.
indexes = itemindex(mm.alphabet)
output = [indexes[x] for x in output]
@@ -514,16 +517,16 @@
# Store the best scores.
scores = numpy.zeros((N, T))
- scores[:,0] = lp_initial + lp_emission[:,output[0]]
+ scores[:, 0] = lp_initial + lp_emission[:, output[0]]
for t in range(1, T):
k = output[t]
for j in range(N):
# Find the most likely place it came from.
- i_scores = scores[:,t-1] + \
- lp_transition[:,j] + \
- lp_emission[j,k]
+ i_scores = scores[:, t-1] + \
+ lp_transition[:, j] + \
+ lp_emission[j, k]
indexes = _argmaxes(i_scores)
- scores[j,t] = i_scores[indexes[0]]
+ scores[j, t] = i_scores[indexes[0]]
backtrace[j][t] = indexes
# Do the backtrace. First, find a good place to start. Then,
@@ -533,7 +536,7 @@
# it by keeping our own stack.
in_process = [] # list of (t, states, score)
results = [] # return values. list of (states, score)
- indexes = _argmaxes(scores[:,T-1]) # pick the first place
+ indexes = _argmaxes(scores[:, T-1]) # pick the first place
for i in indexes:
in_process.append((T-1, [i], scores[i][T-1]))
while in_process:
diff -Nru python-biopython-1.62/Bio/MaxEntropy.py python-biopython-1.63/Bio/MaxEntropy.py
--- python-biopython-1.62/Bio/MaxEntropy.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/MaxEntropy.py 2013-12-05 14:10:43.000000000 +0000
@@ -3,15 +3,16 @@
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
-Maximum Entropy code.
+"""Maximum Entropy code.
-Uses Improved Iterative Scaling:
-XXX ref
+Uses Improved Iterative Scaling.
+"""
+#TODO Define terminology
-# XXX need to define terminology
+from __future__ import print_function
+from functools import reduce
-"""
+from Bio._py3k import map
import numpy
@@ -123,7 +124,7 @@
expects = []
for feature in features:
sum = 0.0
- for (i, j), f in feature.iteritems():
+ for (i, j), f in feature.items():
sum += p_yx[i][j] * f
expects.append(sum/len(xs))
return expects
@@ -141,7 +142,7 @@
# Calculate log P(y, x).
assert len(features) == len(alphas)
for feature, alpha in zip(features, alphas):
- for (x, y), f in feature.iteritems():
+ for (x, y), f in feature.items():
prob_yx[x][y] += alpha * f
# Take an exponent to get P(y, x)
prob_yx = numpy.exp(prob_yx)
@@ -171,7 +172,7 @@
# f#(x, y) = SUM_i feature(x, y)
f_sharp = numpy.zeros((N, nclasses))
for feature in features:
- for (i, j), f in feature.iteritems():
+ for (i, j), f in feature.items():
f_sharp[i][j] += f
return f_sharp
@@ -184,7 +185,7 @@
iters = 0
while iters < max_newton_iterations: # iterate for Newton's method
f_newton = df_newton = 0.0 # evaluate the function and derivative
- for (i, j), f in feature.iteritems():
+ for (i, j), f in feature.items():
prod = prob_yx[i][j] * f * numpy.exp(delta * f_sharp[i][j])
f_newton += prod
df_newton += prod * f_sharp[i][j]
@@ -334,4 +335,4 @@
xe=train(xcar, ycar, user_functions)
for xv, yv in zip(xcar, ycar):
xc=classify(xe, xv)
- print 'Pred:', xv, 'gives', xc, 'y is', yv
+ print('Pred: %s gives %s y is %s' % (xv, xc, yv))
diff -Nru python-biopython-1.62/Bio/Medline/__init__.py python-biopython-1.63/Bio/Medline/__init__.py
--- python-biopython-1.62/Bio/Medline/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Medline/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -3,8 +3,7 @@
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""
-This module provides code to work with Medline.
+"""Code to work with Medline from the NCBI.
Classes:
Record A dictionary holding Medline data.
@@ -15,6 +14,8 @@
"""
+from __future__ import print_function
+
class Record(dict):
"""A dictionary holding information from a Medline record.
All data are stored under the mnemonic appearing in the Medline
@@ -105,12 +106,13 @@
Typical usage:
from Bio import Medline
- handle = open("mymedlinefile")
- records = Medline.parse(handle)
- for record in record:
- print record['TI']
+ with open("mymedlinefile") as handle:
+ records = Medline.parse(handle)
+ for record in record:
+ print(record['TI'])
"""
+ #TODO - Turn that into a working doctest
# These keys point to string values
textkeys = ("ID", "PMID", "SO", "RF", "NI", "JC", "TA", "IS", "CY", "TT",
"CA", "IP", "VI", "DP", "YR", "PG", "LID", "DA", "LR", "OWN",
@@ -135,7 +137,7 @@
record[key] = []
record[key].append(line[6:])
try:
- line = handle.next()
+ line = next(handle)
except StopIteration:
finished = True
else:
@@ -160,10 +162,11 @@
Typical usage:
from Bio import Medline
- handle = open("mymedlinefile")
- record = Medline.read(handle)
- print record['TI']
+ with open("mymedlinefile") as handle:
+ record = Medline.read(handle)
+ print(record['TI'])
"""
+ #TODO - Turn that into a working doctest
records = parse(handle)
- return records.next()
+ return next(records)
diff -Nru python-biopython-1.62/Bio/Motif/Applications/_AlignAce.py python-biopython-1.63/Bio/Motif/Applications/_AlignAce.py
--- python-biopython-1.62/Bio/Motif/Applications/_AlignAce.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Motif/Applications/_AlignAce.py 1970-01-01 00:00:00.000000000 +0000
@@ -1,144 +0,0 @@
-# Copyright 2003-2009 by Bartek Wilczynski. All rights reserved.
-# Revisions copyright 2009 by Peter Cock.
-# This code is part of the Biopython distribution and governed by its
-# license. Please see the LICENSE file that should have been included
-# as part of this package.
-"""This module provides code to work with the standalone version of AlignACE,
-for motif search in DNA sequences.
-
-AlignACE homepage:
-
-http://arep.med.harvard.edu/mrnadata/mrnasoft.html
-
-AlignACE Citations:
-
-Computational identification of cis-regulatory elements associated with
-groups of functionally related genes in Saccharomyces cerevisiae,
-Hughes, JD, Estep, PW, Tavazoie S, & GM Church, Journal of Molecular
-Biology 2000 Mar 10;296(5):1205-14.
-
-Finding DNA Regulatory Motifs within Unaligned Non-Coding Sequences
-Clustered by Whole-Genome mRNA Quantitation,
-Roth, FR, Hughes, JD, Estep, PE & GM Church, Nature Biotechnology
-1998 Oct;16(10):939-45.
-
-"""
-from Bio.Application import AbstractCommandline, _Option, _Argument
-
-import warnings
-from Bio import BiopythonDeprecationWarning
-
-
-class AlignAceCommandline(AbstractCommandline):
- """Create a commandline for the AlignAce program (DEPRECATED).
-
- Example:
-
- >>> from Bio.Motif.Applications import AlignAceCommandline
- >>> in_file = "sequences.fasta"
- >>> alignace_cline = AlignAceCommandline(infile=in_file, gcback=0.55)
- >>> print alignace_cline
- AlignACE -i sequences.fasta -gcback 0.55
-
- You would typically run the command line with alignace_cline() or via
- the Python subprocess module, as described in the Biopython tutorial.
- """
- def __init__(self, cmd="AlignACE", **kwargs):
- warnings.warn("""The AlignACE application wrapper is deprecated and
- is likely to be removed in a future release of Biopython,
- since an up to date version of the AlignACE software
- cannot be obtained anymore. If you have a copy of
- AlignACE 4, please consider contacting the Biopython
- developers.""", BiopythonDeprecationWarning)
- self.parameters = \
- [
- _Option(["-i", "infile"],
- "Input Sequence file in FASTA format.",
- checker_function=lambda x: isinstance(x, str),
- equate=False,
- filename=True),
-
- _Option(["-numcols", "numcols"],
- "Number of columns to align",
- equate=False,
- checker_function=lambda x: isinstance(x, int)),
-
- _Option(["-expect", "expect"],
- "number of sites expected in model",
- equate=False,
- checker_function=lambda x: isinstance(x, int)),
-
- _Option(["-gcback", "gcback"],
- "background fractional GC content of input sequence",
- equate=False,
- checker_function=lambda x: isinstance(x, float)),
-
- _Option(["-minpass", "minpass"],
- "minimum number of non-improved passes in phase 1",
- equate=False,
- checker_function=lambda x: isinstance(x, int)),
-
- _Option(["-seed", "seed"],
- "set seed for random number generator (time)",
- equate=False,
- checker_function=lambda x: isinstance(x, int)),
-
- _Option(["-undersample", "undersample"],
- "possible sites / (expect * numcols * seedings)",
- equate=False,
- checker_function=lambda x: isinstance(x, int)),
-
- _Option(["-oversample", "oversample"],
- "1/undersample",
- equate=False,
- checker_function=lambda x: isinstance(x, int)),
- ]
- AbstractCommandline.__init__(self, cmd, **kwargs)
-
-
-class CompareAceCommandline(AbstractCommandline):
- """Create a commandline for the CompareAce program (DEPRECATED).
-
- Example:
-
- >>> from Bio.Motif.Applications import CompareAceCommandline
- >>> m1_file = "sequences1.fasta"
- >>> m2_file = "sequences2.fasta"
- >>> compareace_cline = CompareAceCommandline(motif1=m1_file, motif2=m2_file)
- >>> print compareace_cline
- CompareACE sequences1.fasta sequences2.fasta
-
- You would typically run the command line with compareace_cline() or via
- the Python subprocess module, as described in the Biopython tutorial.
- """
- def __init__(self, cmd="CompareACE", **kwargs):
- warnings.warn("""The CompareACE application wrapper is deprecated and
- is likely to be removed in a future release of Biopython,
- since an up to date version of the AlignACE software
- cannot be obtained anymore. If you have a copy of
- AlignACE 4, please consider contacting the Biopython
- developers.""", BiopythonDeprecationWarning)
- self.parameters = \
- [
- _Argument(["motif1"],
- "name of file containing motif 1",
- checker_function=lambda x: isinstance(x, str),
- filename=True),
- _Argument(["motif2"],
- "name of file containing motif 2",
- checker_function=lambda x: isinstance(x, str),
- filename=True),
- ]
- AbstractCommandline.__init__(self, cmd, **kwargs)
-
-
-def _test():
- """Run the module's doctests (PRIVATE)."""
- print "Running AlignAce doctests..."
- import doctest
- doctest.testmod()
- print "Done"
-
-
-if __name__ == "__main__":
- _test()
diff -Nru python-biopython-1.62/Bio/Motif/Applications/_XXmotif.py python-biopython-1.63/Bio/Motif/Applications/_XXmotif.py
--- python-biopython-1.62/Bio/Motif/Applications/_XXmotif.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Motif/Applications/_XXmotif.py 1970-01-01 00:00:00.000000000 +0000
@@ -1,182 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2012 by Christian Brueffer. All rights reserved.
-#
-# This code is part of the Biopython distribution and governed by its
-# license. Please see the LICENSE file that should have been included
-# as part of this package.
-"""Command line wrapper for the motif finding program XXmotif."""
-
-import os
-from Bio.Application import AbstractCommandline, _Option, _Switch, _Argument
-
-
-class XXmotifCommandline(AbstractCommandline):
- """Command line wrapper for XXmotif.
-
- http://xxmotif.genzentrum.lmu.de/
-
- Example:
-
- >>> from Bio.Motif.Applications import XXmotifCommandline
- >>> out_dir = "results"
- >>> in_file = "sequences.fasta"
- >>> xxmotif_cline = XXmotifCommandline(outdir=out_dir, seqfile=in_file, revcomp=True)
- >>> print xxmotif_cline
- XXmotif results sequences.fasta --revcomp
-
- You would typically run the command line with xxmotif_cline() or via
- the Python subprocess module, as described in the Biopython tutorial.
-
- Citations:
-
- Luehr S, Hartmann H, and Söding J. The XXmotif web server for eXhaustive,
- weight matriX-based motif discovery in nucleotide sequences,
- Nucleic Acids Res. 40: W104-W109 (2012).
-
- Hartmann H, Guthoehrlein EW, Siebert M., Luehr S, and Söding J. P-value
- based regulatory motif discovery using positional weight matrices
- (to be published)
-
- Last checked against version: 1.3
- """
-
- def __init__(self, cmd="XXmotif", **kwargs):
- # order of parameters is the same as in XXmotif --help
- _valid_alphabet = set("ACGTNX")
-
- self.parameters = \
- [
- _Argument(["outdir", "OUTDIR"],
- "output directory for all results",
- filename = True,
- is_required = True,
- # XXmotif currently does not accept spaces in the outdir name
- checker_function = lambda x: " " not in x),
- _Argument(["seqfile", "SEQFILE"],
- "file name with sequences from positive set in FASTA format",
- filename = True,
- is_required = True,
- # XXmotif currently only accepts a pure filename
- checker_function = lambda x: os.path.split(x)[0] == ""),
-
- # Options
- _Option(["--negSet", "negSet", "negset", "NEGSET"],
- "sequence set which has to be used as a reference set",
- filename = True,
- equate = False),
- _Switch(["--zoops", "zoops", "ZOOPS"],
- "use zero-or-one occurrence per sequence model (DEFAULT)"),
- _Switch(["--mops", "mops", "MOPS"],
- "use multiple occurrence per sequence model"),
- _Switch(["--oops", "oops", "OOPS"],
- "use one occurrence per sequence model"),
- _Switch(["--revcomp", "revcomp", "REVCOMP"],
- "search in reverse complement of sequences as well (DEFAULT: NO)"),
- _Option(["--background-model-order", "background-model-order", "BACKGROUND-MODEL-ORDER"],
- "order of background distribution (DEFAULT: 2, 8(--negset) )",
- checker_function = lambda x: isinstance(x, int),
- equate = False),
- _Option(["--pseudo", "pseudo", "PSEUDO"],
- "percentage of pseudocounts used (DEFAULT: 10)",
- checker_function = lambda x: isinstance(x, int),
- equate = False),
- _Option(["-g", "--gaps", "gaps", "GAPS"],
- "maximum number of gaps used for start seeds [0-3] (DEFAULT: 0)",
- checker_function = lambda x: x in [0-3],
- equate = False),
- _Option(["--type", "type", "TYPE"],
- "defines what kind of start seeds are used (DEFAULT: ALL)"
- "possible types: ALL, FIVEMERS, PALINDROME, TANDEM, NOPALINDROME, NOTANDEM",
- checker_function = lambda x: x in ["ALL", "all",
- "FIVEMERS", "fivemers",
- "PALINDROME", "palindrome",
- "TANDEM", "tandem",
- "NOPALINDROME", "nopalindrome",
- "NOTANDEM", "notandem"],
- equate = False),
- _Option(["--merge-motif-threshold", "merge-motif-threshold", "MERGE-MOTIF-THRESHOLD"],
- "defines the similarity threshold for merging motifs (DEFAULT: HIGH)"
- "possible modes: LOW, MEDIUM, HIGH",
- checker_function = lambda x: x in ["LOW", "low",
- "MEDIUM", "medium",
- "HIGH", "high"],
- equate = False),
- _Switch(["--no-pwm-length-optimization", "no-pwm-length-optimization", "NO-PWM-LENGTH-OPTIMIZATION"],
- "do not optimize length during iterations (runtime advantages)"),
- _Option(["--max-match-positions", "max-match-positions", "MAX-MATCH-POSITIONS"],
- "max number of positions per motif (DEFAULT: 17, higher values will lead to very long runtimes)",
- checker_function = lambda x: isinstance(x, int),
- equate = False),
- _Switch(["--batch", "batch", "BATCH"],
- "suppress progress bars (reduce output size for batch jobs)"),
- _Option(["--maxPosSetSize", "maxPosSetSize", "maxpossetsize", "MAXPOSSETSIZE"],
- "maximum number of sequences from the positive set used [DEFAULT: all]",
- checker_function = lambda x: isinstance(x, int),
- equate = False),
- # does not make sense in biopython
- #_Switch(["--help", "help", "HELP"],
- # "print this help page"),
- _Option(["--trackedMotif", "trackedMotif", "trackedmotif", "TRACKEDMOTIF"],
- "inspect extensions and refinement of a given seed (DEFAULT: not used)",
- checker_function = lambda x: any((c in _valid_alphabet) for c in x),
- equate = False),
-
- # Using conservation information
- _Option(["--format", "format", "FORMAT"],
- "defines what kind of format the input sequences have (DEFAULT: FASTA)",
- checker_function = lambda x: x in ["FASTA", "fasta",
- "MFASTA", "mfasta"],
- equate = False),
- _Option(["--maxMultipleSequences", "maxMultipleSequences", "maxmultiplesequences", "MAXMULTIPLESEQUENCES"],
- "maximum number of sequences used in an alignment [DEFAULT: all]",
- checker_function = lambda x: isinstance(x, int),
- equate = False),
-
- # Using localization information
- _Switch(["--localization", "localization", "LOCALIZATION"],
- "use localization information to calculate combined P-values"
- "(sequences should have all the same length)"),
- _Option(["--downstream", "downstream", "DOWNSTREAM"],
- "number of residues in positive set downstream of anchor point (DEFAULT: 0)",
- checker_function = lambda x: isinstance(x, int),
- equate = False),
-
- # Start with self defined motif
- _Option(["-m", "--startMotif", "startMotif", "startmotif", "STARTMOTIF"],
- "Start motif (IUPAC characters)",
- checker_function = lambda x: any((c in _valid_alphabet) for c in x),
- equate = False),
- _Option(["-p", "--profileFile", "profileFile", "profilefile", "PROFILEFILE"],
- "profile file",
- filename = True,
- equate = False),
- _Option(["--startRegion", "startRegion", "startregion", "STARTREGION"],
- "expected start position for motif occurrences relative to anchor point (--localization)",
- checker_function = lambda x: isinstance(x, int),
- equate = False),
- _Option(["--endRegion", "endRegion", "endregion", "ENDREGION"],
- "expected end position for motif occurrences relative to anchor point (--localization)",
- checker_function = lambda x: isinstance(x, int),
- equate = False),
-
- # XXmotif wrapper options
- _Switch(["--XXmasker", "masker"],
- "mask the input sequences for homology, repeats and low complexity regions"),
- _Switch(["--XXmasker-pos", "maskerpos"],
- "mask only the positive set for homology, repeats and low complexity regions"),
- _Switch(["--no-graphics", "nographics"],
- "run XXmotif without graphical output"),
- ]
- AbstractCommandline.__init__(self, cmd, **kwargs)
-
-
-def _test():
- """Run the module's doctests (PRIVATE)."""
- print "Running XXmotif doctests..."
- import doctest
- doctest.testmod()
- print "Done"
-
-
-if __name__ == "__main__":
- _test()
diff -Nru python-biopython-1.62/Bio/Motif/Applications/__init__.py python-biopython-1.63/Bio/Motif/Applications/__init__.py
--- python-biopython-1.62/Bio/Motif/Applications/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Motif/Applications/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,9 +1,9 @@
# Copyright 2009 by Bartek Wilczynski. All rights reserved.
-# Revisions copyright 2009 by Peter Cock.
+# Revisions copyright 2009-2013 by Peter Cock.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""Motif command line tool wrappers."""
-from _AlignAce import AlignAceCommandline
-from _AlignAce import CompareAceCommandline
-from _XXmotif import XXmotifCommandline
+"""Motif command line tool wrappers (DEPRECATED, see Bio.motifs instead)."""
+from Bio.motifs.applications import AlignAceCommandline
+from Bio.motifs.applications import CompareAceCommandline
+from Bio.motifs.applications import XXmotifCommandline
diff -Nru python-biopython-1.62/Bio/Motif/Parsers/AlignAce.py python-biopython-1.63/Bio/Motif/Parsers/AlignAce.py
--- python-biopython-1.62/Bio/Motif/Parsers/AlignAce.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Motif/Parsers/AlignAce.py 2013-12-05 14:10:43.000000000 +0000
@@ -21,8 +21,8 @@
def read(handle):
"""read(handle)"""
record = Record()
- record.ver = handle.next()
- record.cmd_line = handle.next()
+ record.ver = next(handle)
+ record.cmd_line = next(handle)
for line in handle:
if line.strip() == "":
pass
@@ -44,7 +44,7 @@
elif line[:3]=="MAP":
record.current_motif.score = float(line.split()[-1])
elif len(line.split("\t"))==4:
- seq = Seq(line.split("\t")[0],IUPAC.unambiguous_dna)
+ seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna)
record.current_motif.add_instance(seq)
elif "*" in line:
record.current_motif.set_mask(line.strip("\n\c"))
diff -Nru python-biopython-1.62/Bio/Motif/Parsers/MAST.py python-biopython-1.63/Bio/Motif/Parsers/MAST.py
--- python-biopython-1.62/Bio/Motif/Parsers/MAST.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Motif/Parsers/MAST.py 2013-12-05 14:10:43.000000000 +0000
@@ -59,10 +59,10 @@
for line in handle:
if line.startswith('DATABASE AND MOTIFS'):
break
- line = handle.next()
+ line = next(handle)
if not line.startswith('****'):
raise ValueError("Line does not start with '****':\n%s" % line)
- line = handle.next()
+ line = next(handle)
if not 'DATABASE' in line:
raise ValueError("Line does not contain 'DATABASE':\n%s" % line)
words = line.strip().split()
@@ -74,7 +74,7 @@
for line in handle:
if 'MOTIF WIDTH' in line:
break
- line = handle.next()
+ line = next(handle)
if not '----' in line:
raise ValueError("Line does not contain '----':\n%s" % line)
for line in handle:
@@ -96,7 +96,7 @@
for line in handle:
if line.startswith('SEQUENCE NAME'):
break
- line = handle.next()
+ line = next(handle)
if not line.startswith('---'):
raise ValueError("Line does not start with '---':\n%s" % line)
for line in handle:
@@ -105,7 +105,7 @@
else:
sequence, description_evalue_length = line.split(None, 1)
record.sequences.append(sequence)
- line = handle.next()
+ line = next(handle)
if not line.startswith('****'):
raise ValueError("Line does not start with '****':\n%s" % line)
@@ -117,7 +117,7 @@
for line in handle:
if line.startswith('SEQUENCE NAME'):
break
- line = handle.next()
+ line = next(handle)
if not line.startswith('---'):
raise ValueError("Line does not start with '---':\n%s" % line)
for line in handle:
@@ -129,7 +129,7 @@
else:
sequence, pvalue, diagram = line.split()
record.diagrams[sequence] = diagram
- line = handle.next()
+ line = next(handle)
if not line.startswith('****'):
raise ValueError("Line does not start with '****':\n%s" % line)
diff -Nru python-biopython-1.62/Bio/Motif/Parsers/MEME.py python-biopython-1.63/Bio/Motif/Parsers/MEME.py
--- python-biopython-1.62/Bio/Motif/Parsers/MEME.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Motif/Parsers/MEME.py 2013-12-05 14:10:43.000000000 +0000
@@ -4,6 +4,8 @@
# license. Please see the LICENSE file that should have been included
# as part of this package.
+from __future__ import print_function
+
from Bio.Alphabet import IUPAC
from Bio import Seq
import re
@@ -18,12 +20,12 @@
Example:
- >>> f = open("meme.output.txt")
>>> from Bio.Motif.Parsers import MEME
- >>> record = MEME.read(f)
+ >>> with open("meme.output.txt") as f:
+ ... record = MEME.read(f)
>>> for motif in record.motifs:
... for instance in motif.instances:
- ... print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue
+ ... print(instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue)
"""
record = MEMERecord()
@@ -45,7 +47,7 @@
__read_motif_sequences(motif, handle, 'revcomp' in record.command)
__skip_unused_lines(handle)
try:
- line = handle.next()
+ line = next(handle)
except StopIteration:
raise ValueError('Unexpected end of stream: Expected to find new motif, or the summary of motifs')
if line.startswith("SUMMARY OF MOTIFS"):
@@ -72,20 +74,20 @@
self.evalue = 0.0
def _numoccurrences (self, number):
- if type(number) == int:
+ if isinstance(number, int):
self.num_occurrences = number
else:
number = int(number)
self.num_occurrences = number
- def get_instance_by_name (self,name):
+ def get_instance_by_name (self, name):
for i in self.instances:
if i.sequence_name == name:
return i
return None
def add_instance_from_values (self, name = 'default', pvalue = 1, sequence = 'ATA', start = 0, strand = '+'):
- inst = MEMEInstance(sequence,self.alphabet)
+ inst = MEMEInstance(sequence, self.alphabet)
inst._pvalue(pvalue)
inst._seqname(name)
inst._start(start)
@@ -99,7 +101,7 @@
self.add_instance(inst)
def _evalue (self, evalue):
- if type(evalue) == float:
+ if isinstance(evalue, float):
self.evalue = evalue
else:
evalue = float(evalue)
@@ -125,11 +127,11 @@
def _motifname (self, name):
self.motif_name = name
- def _start (self,start):
+ def _start (self, start):
start = int(start)
self.start = start
- def _pvalue (self,pval):
+ def _pvalue (self, pval):
pval = float(pval)
self.pvalue = pval
@@ -187,31 +189,31 @@
else:
raise ValueError("Unexpected end of stream: 'TRAINING SET' not found.")
try:
- line = handle.next()
+ line = next(handle)
except StopIteration:
raise ValueError("Unexpected end of stream: Expected to find line starting with '****'")
if not line.startswith('****'):
raise ValueError("Line does not start with '****':\n%s" % line)
try:
- line = handle.next()
+ line = next(handle)
except StopIteration:
raise ValueError("Unexpected end of stream: Expected to find line starting with 'DATAFILE'")
if not line.startswith('DATAFILE'):
raise ValueError("Line does not start with 'DATAFILE':\n%s" % line)
line = line.strip()
- line = line.replace('DATAFILE= ','')
+ line = line.replace('DATAFILE= ', '')
record.datafile = line
def __read_alphabet(record, handle):
try:
- line = handle.next()
+ line = next(handle)
except StopIteration:
raise ValueError("Unexpected end of stream: Expected to find line starting with 'ALPHABET'")
if not line.startswith('ALPHABET'):
raise ValueError("Line does not start with 'ALPHABET':\n%s" % line)
line = line.strip()
- line = line.replace('ALPHABET= ','')
+ line = line.replace('ALPHABET= ', '')
if line == 'ACGT':
al = IUPAC.unambiguous_dna
else:
@@ -221,13 +223,13 @@
def __read_sequence_names(record, handle):
try:
- line = handle.next()
+ line = next(handle)
except StopIteration:
raise ValueError("Unexpected end of stream: Expected to find line starting with 'Sequence name'")
if not line.startswith('Sequence name'):
raise ValueError("Line does not start with 'Sequence name':\n%s" % line)
try:
- line = handle.next()
+ line = next(handle)
except StopIteration:
raise ValueError("Unexpected end of stream: Expected to find line starting with '----'")
if not line.startswith('----'):
@@ -251,7 +253,7 @@
else:
raise ValueError("Unexpected end of stream: Expected to find line starting with 'command'")
line = line.strip()
- line = line.replace('command: ','')
+ line = line.replace('command: ', '')
record.command = line
@@ -279,19 +281,19 @@
def __read_motif_sequences(motif, handle, rv):
try:
- line = handle.next()
+ line = next(handle)
except StopIteration:
raise ValueError('Unexpected end of stream: Failed to find motif sequences')
if not line.startswith('---'):
raise ValueError("Line does not start with '---':\n%s" % line)
try:
- line = handle.next()
+ line = next(handle)
except StopIteration:
raise ValueError("Unexpected end of stream: Expected to find line starting with 'Sequence name'")
if not line.startswith('Sequence name'):
raise ValueError("Line does not start with 'Sequence name':\n%s" % line)
try:
- line = handle.next()
+ line = next(handle)
except StopIteration:
raise ValueError('Unexpected end of stream: Failed to find motif sequences')
if not line.startswith('---'):
@@ -338,13 +340,13 @@
else:
raise ValueError("Unexpected end of stream: Expected to find line starting with 'Time'")
try:
- line = handle.next()
+ line = next(handle)
except StopIteration:
raise ValueError('Unexpected end of stream: Expected to find blank line')
if line.strip():
raise ValueError("Expected blank line, but got:\n%s" % line)
try:
- line = handle.next()
+ line = next(handle)
except StopIteration:
raise ValueError("Unexpected end of stream: Expected to find line starting with '***'")
if not line.startswith('***'):
@@ -356,3 +358,4 @@
raise ValueError("Unexpected end of stream: Expected to find line starting with '***'")
if not line.startswith('***'):
raise ValueError("Line does not start with '***':\n%s" % line)
+
diff -Nru python-biopython-1.62/Bio/Motif/Thresholds.py python-biopython-1.63/Bio/Motif/Thresholds.py
--- python-biopython-1.62/Bio/Motif/Thresholds.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Motif/Thresholds.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,7 +5,7 @@
# as part of this package.
"""Approximate calculation of appropriate thresholds for motif finding
"""
-import math,random
+import math, random
class ScoreDistribution(object):
""" Class representing approximate score distribution for a given motif.
@@ -15,8 +15,8 @@
thresholds for motif occurences.
"""
def __init__(self,motif,precision=10**3):
- self.min_score=min(0.0,motif.min_score())
- self.interval=max(0.0,motif.max_score())-self.min_score
+ self.min_score=min(0.0, motif.min_score())
+ self.interval=max(0.0, motif.max_score())-self.min_score
self.n_points=precision*motif.length
self.step=self.interval/(self.n_points-1)
self.mo_density=[0.0]*self.n_points
@@ -24,27 +24,27 @@
self.bg_density=[0.0]*self.n_points
self.bg_density[-self._index_diff(self.min_score)]=1.0
self.ic=motif.ic()
- for lo,mo in zip(motif.log_odds(),motif.pwm()):
- self.modify(lo,mo,motif.background)
+ for lo, mo in zip(motif.log_odds(), motif.pwm()):
+ self.modify(lo, mo, motif.background)
def _index_diff(self,x,y=0.0):
return int((x-y+0.5*self.step)//self.step)
- def _add(self,i,j):
- return max(0,min(self.n_points-1,i+j))
+ def _add(self, i, j):
+ return max(0, min(self.n_points-1, i+j))
- def modify(self,scores,mo_probs,bg_probs):
+ def modify(self, scores, mo_probs, bg_probs):
mo_new=[0.0]*self.n_points
bg_new=[0.0]*self.n_points
- for k, v in scores.iteritems():
+ for k, v in scores.items():
d=self._index_diff(v)
for i in range(self.n_points):
- mo_new[self._add(i,d)]+=self.mo_density[i]*mo_probs[k]
- bg_new[self._add(i,d)]+=self.bg_density[i]*bg_probs[k]
+ mo_new[self._add(i, d)]+=self.mo_density[i]*mo_probs[k]
+ bg_new[self._add(i, d)]+=self.bg_density[i]*bg_probs[k]
self.mo_density=mo_new
self.bg_density=bg_new
- def threshold_fpr(self,fpr):
+ def threshold_fpr(self, fpr):
"""
Approximate the log-odds threshold which makes the type I error (false positive rate).
"""
@@ -55,7 +55,7 @@
prob+=self.bg_density[i]
return self.min_score+i*self.step
- def threshold_fnr(self,fnr):
+ def threshold_fnr(self, fnr):
"""
Approximate the log-odds threshold which makes the type II error (false negative rate).
"""
@@ -78,7 +78,7 @@
fpr+=self.bg_density[i]
fnr-=self.mo_density[i]
if return_rate:
- return self.min_score+i*self.step,fpr
+ return self.min_score+i*self.step, fpr
else:
return self.min_score+i*self.step
diff -Nru python-biopython-1.62/Bio/Motif/_Motif.py python-biopython-1.63/Bio/Motif/_Motif.py
--- python-biopython-1.62/Bio/Motif/_Motif.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Motif/_Motif.py 2013-12-05 14:10:43.000000000 +0000
@@ -4,10 +4,15 @@
# as part of this package.
"""Implementation of sequence motifs (PRIVATE).
"""
+
+from __future__ import print_function
+
+from Bio._py3k import range
+
from Bio.Seq import Seq
from Bio.SubsMat import FreqTable
from Bio.Alphabet import IUPAC
-import math,random
+import math, random
class Motif(object):
"""
@@ -32,19 +37,20 @@
self.name=""
def _check_length(self, len):
+ # TODO - Change parameter name (len clashes with built in function)?
if self.length is None:
self.length = len
elif self.length != len:
- print "len",self.length,self.instances, len
- raise ValueError("You can't change the length of the motif")
+ raise ValueError("You can't change the length of the motif "
+ "%r %r %r" % (self.length, self.instances, len))
- def _check_alphabet(self,alphabet):
+ def _check_alphabet(self, alphabet):
if self.alphabet is None:
self.alphabet=alphabet
elif self.alphabet != alphabet:
raise ValueError("Wrong Alphabet")
- def add_instance(self,instance):
+ def add_instance(self, instance):
"""
adds new instance to the motif
"""
@@ -63,7 +69,7 @@
self._log_odds_is_current = False
- def set_mask(self,mask):
+ def set_mask(self, mask):
"""
sets the mask for the motif
@@ -90,7 +96,7 @@
return self._pwm
#we need to compute new pwm
self._pwm = []
- for i in xrange(self.length):
+ for i in range(self.length):
dict = {}
#filling the dict with 0's
for letter in self.alphabet.letters:
@@ -110,7 +116,7 @@
dict[seq[i]]+=1
except KeyError: #we need to ignore non-alphabet letters
pass
- self._pwm.append(FreqTable.FreqTable(dict,FreqTable.COUNT,self.alphabet))
+ self._pwm.append(FreqTable.FreqTable(dict, FreqTable.COUNT, self.alphabet))
self._pwm_is_current=1
return self._pwm
@@ -124,10 +130,10 @@
#we need to compute new pwm
self._log_odds = []
pwm=self.pwm(laplace)
- for i in xrange(self.length):
+ for i in range(self.length):
d = {}
for a in self.alphabet.letters:
- d[a]=math.log(pwm[i][a]/self.background[a],2)
+ d[a]=math.log(pwm[i][a]/self.background[a], 2)
self._log_odds.append(d)
self._log_odds_is_current=1
return self._log_odds
@@ -141,7 +147,7 @@
res+=2
for a in self.alphabet.letters:
if pwm[i][a]!=0:
- res+=pwm[i][a]*math.log(pwm[i][a],2)
+ res+=pwm[i][a]*math.log(pwm[i][a], 2)
return res
def exp_score(self,st_dev=False):
@@ -156,25 +162,25 @@
ex2=0.0
for a in self.alphabet.letters:
if pwm[i][a]!=0:
- ex1+=pwm[i][a]*(math.log(pwm[i][a],2)-math.log(self.background[a],2))
- ex2+=pwm[i][a]*(math.log(pwm[i][a],2)-math.log(self.background[a],2))**2
+ ex1+=pwm[i][a]*(math.log(pwm[i][a], 2)-math.log(self.background[a], 2))
+ ex2+=pwm[i][a]*(math.log(pwm[i][a], 2)-math.log(self.background[a], 2))**2
exs+=ex1
var+=ex2-ex1**2
if st_dev:
- return exs,math.sqrt(var)
+ return exs, math.sqrt(var)
else:
return exs
- def search_instances(self,sequence):
+ def search_instances(self, sequence):
"""
a generator function, returning found positions of instances of the motif in a given sequence
"""
if not self.has_instances:
raise ValueError ("This motif has no instances")
- for pos in xrange(0,len(sequence)-self.length+1):
+ for pos in range(0, len(sequence)-self.length+1):
for instance in self.instances:
if instance.tostring()==sequence[pos:pos+self.length].tostring():
- yield(pos,instance)
+ yield(pos, instance)
break # no other instance will fit (we don't want to return multiple hits)
def score_hit(self,sequence,position,normalized=0,masked=0):
@@ -183,7 +189,7 @@
"""
lo=self.log_odds()
score = 0.0
- for pos in xrange(self.length):
+ for pos in range(self.length):
a = sequence[position+pos]
if not masked or self.mask[pos]:
try:
@@ -205,14 +211,14 @@
rc = self.reverse_complement()
sequence=sequence.tostring().upper()
- for pos in xrange(0,len(sequence)-self.length+1):
- score = self.score_hit(sequence,pos,normalized,masked)
+ for pos in range(0, len(sequence)-self.length+1):
+ score = self.score_hit(sequence, pos, normalized, masked)
if score > threshold:
- yield (pos,score)
+ yield (pos, score)
if both:
- rev_score = rc.score_hit(sequence,pos,normalized,masked)
+ rev_score = rc.score_hit(sequence, pos, normalized, masked)
if rev_score > threshold:
- yield (-pos,rev_score)
+ yield (-pos, rev_score)
def dist_pearson(self, motif, masked = 0):
"""
@@ -225,26 +231,26 @@
raise ValueError("Cannot compare motifs with different alphabets")
max_p=-2
- for offset in range(-self.length+1,motif.length):
+ for offset in range(-self.length+1, motif.length):
if offset<0:
- p = self.dist_pearson_at(motif,-offset)
+ p = self.dist_pearson_at(motif, -offset)
else: #offset>=0
- p = motif.dist_pearson_at(self,offset)
+ p = motif.dist_pearson_at(self, offset)
if max_p=0
- p = other.dist_product_at(self,offset)
+ p = other.dist_product_at(self, offset)
if max_p=0
- d = other.dist_dpq_at(self,offset)
+ d = other.dist_dpq_at(self, offset)
overlap = other.length-offset
- overlap = min(self.length,other.length,overlap)
- out = self.length+other.length-2*overlap
- #print d,1.0*(overlap+out)/overlap,d*(overlap+out)/overlap
+ overlap = min(self.length, other.length, overlap)
+ out = self.length+other.length - 2*overlap
+ #print("%f %f %f" % (d,1.0*(overlap+out)/overlap,d*(overlap+out)/overlap))
#d = d/(2*overlap)
d = (d/(out+overlap))*(2*overlap+out)/(2*overlap)
- #print d
- d_s.append((offset,d))
- if min_d> d:
- min_d=d
- min_o=-offset
- return min_d,min_o#,d_s
+ #print(d)
+ d_s.append((offset, d))
+ if min_d > d:
+ min_d = d
+ min_o = -offset
+ return min_d, min_o #,d_s
- def dist_dpq_at(self,other,offset):
+ def dist_dpq_at(self, other, offset):
"""
calculates the dist_dpq measure with a given offset.
offset should satisfy 0<=offset<=len(self)
"""
- def dpq (f1,f2,alpha):
+ def dpq (f1, f2, alpha):
s=0
for n in alpha.letters:
avg=(f1[n]+f2[n])/2
- s+=f1[n]*math.log(f1[n]/avg,2)+f2[n]*math.log(f2[n]/avg,2)
+ s+=f1[n]*math.log(f1[n]/avg, 2)+f2[n]*math.log(f2[n]/avg, 2)
return math.sqrt(s)
s=0
- for i in range(max(self.length,offset+other.length)):
+ for i in range(max(self.length, offset+other.length)):
f1=self[i]
f2=other[i-offset]
- s+=dpq(f1,f2,self.alphabet)
+ s+=dpq(f1, f2, self.alphabet)
return s
- def _read(self,stream):
+ def _read(self, stream):
"""Reads the motif from the stream (in AlignAce format).
the self.alphabet variable must be set beforehand.
If the last line contains asterisks it is used for setting mask
"""
- while 1:
+ while True:
ln = stream.readline()
if "*" in ln:
self.set_mask(ln.strip("\n\c"))
break
- self.add_instance(Seq(ln.strip(),self.alphabet))
+ self.add_instance(Seq(ln.strip(), self.alphabet))
def __str__(self,masked=False):
""" string representation of a motif.
@@ -366,7 +372,7 @@
str = str + inst.tostring() + "\n"
if masked:
- for i in xrange(self.length):
+ for i in range(self.length):
if self.mask[i]:
str = str + "*"
else:
@@ -384,7 +390,7 @@
else:
return self.length
- def _write(self,stream):
+ def _write(self, stream):
"""
writes the motif to the stream
"""
@@ -400,7 +406,7 @@
if not self.has_instances:
self.make_instances_from_counts()
str = ""
- for i,inst in enumerate(self.instances):
+ for i, inst in enumerate(self.instances):
str = str + ">instance%d\n"%i + inst.tostring() + "\n"
return str
@@ -434,24 +440,24 @@
The instances are fake, but the pwm is accurate.
"""
- return self._from_horiz_matrix(stream,letters="ACGT",make_instances=make_instances)
+ return self._from_horiz_matrix(stream, letters="ACGT", make_instances=make_instances)
def _from_vert_matrix(self,stream,letters=None,make_instances=False):
"""reads a vertical count matrix from stream and fill in the counts.
"""
self.counts = {}
- self.has_counts=True
+ self.has_counts = True
if letters is None:
- letters=self.alphabet.letters
- self.length=0
+ letters = self.alphabet.letters
+ self.length = 0
for i in letters:
- self.counts[i]=[]
+ self.counts[i] = []
for ln in stream.readlines():
- rec=map(float,ln.strip().split())
- for k,v in zip(letters,rec):
+ rec = [float(x) for x in ln.strip().split()]
+ for k, v in zip(letters, rec):
self.counts[k].append(v)
- self.length+=1
+ self.length += 1
self.set_mask("*"*self.length)
if make_instances is True:
self.make_instances_from_counts()
@@ -468,14 +474,14 @@
for i in letters:
ln = stream.readline().strip().split()
#if there is a letter in the beginning, ignore it
- if ln[0]==i:
- ln=ln[1:]
- #print ln
+ if ln[0] == i:
+ ln = ln[1:]
+ #print(ln)
try:
- self.counts[i]=map(int,ln)
+ self.counts[i] = [int(x) for x in ln]
except ValueError: #not integers
- self.counts[i]=map(float,ln) #map(lambda s: int(100*float(s)),ln)
- #print counts[i]
+ self.counts[i] = [float(x) for x in ln]
+ #print(counts[i])
s = sum(self.counts[nuc][0] for nuc in letters)
l = len(self.counts[letters[0]])
@@ -492,27 +498,27 @@
In case the sums of counts are different for different columnes, the
shorter columns are padded with background.
"""
- alpha="".join(self.alphabet.letters)
+ alpha = "".join(self.alphabet.letters)
#col[i] is a column taken from aligned motif instances
- col=[]
- self.has_instances=True
- self.instances=[]
- s = sum(map(lambda nuc: self.counts[nuc][0],self.alphabet.letters))
+ col = []
+ self.has_instances = True
+ self.instances = []
+ s = sum(self.counts[nuc][0] for nuc in self.alphabet.letters)
for i in range(self.length):
col.append("")
for n in self.alphabet.letters:
- col[i] = col[i]+ (n*(self.counts[n][i]))
- if len(col[i])>> from Bio import Motif
- >>> for motif in Motif.parse(open("Motif/alignace.out"),"AlignAce"):
- ... print motif.consensus()
+ >>> for motif in Motif.parse(open("Motif/alignace.out"), "AlignAce"):
+ ... print(motif.consensus())
TCTACGATTGAG
CTGCACCTAGCTACGAGTGAG
GTGCCCTAAGCATACTAGGCG
@@ -93,7 +95,7 @@
for m in parser(handle).motifs:
yield m
-def read(handle,format):
+def read(handle, format):
"""Reads a motif from a handle using a specified file-format.
This supports the same formats as Bio.Motif.parse(), but
@@ -101,14 +103,14 @@
reading a pfm file:
>>> from Bio import Motif
- >>> motif = Motif.read(open("Motif/SRF.pfm"),"jaspar-pfm")
+ >>> motif = Motif.read(open("Motif/SRF.pfm"), "jaspar-pfm")
>>> motif.consensus()
Seq('GCCCATATATGG', IUPACUnambiguousDNA())
Or a single-motif MEME file,
>>> from Bio import Motif
- >>> motif = Motif.read(open("Motif/meme.out"),"MEME")
+ >>> motif = Motif.read(open("Motif/meme.out"), "MEME")
>>> motif.consensus()
Seq('CTCAATCGTA', IUPACUnambiguousDNA())
@@ -116,7 +118,7 @@
an exception is raised:
>>> from Bio import Motif
- >>> motif = Motif.read(open("Motif/alignace.out"),"AlignAce")
+ >>> motif = Motif.read(open("Motif/alignace.out"), "AlignAce")
Traceback (most recent call last):
...
ValueError: More than one motif found in handle
@@ -126,7 +128,7 @@
shown in the example above). Instead use:
>>> from Bio import Motif
- >>> motif = Motif.parse(open("Motif/alignace.out"),"AlignAce").next()
+ >>> motif = next(Motif.parse(open("Motif/alignace.out"), "AlignAce"))
>>> motif.consensus()
Seq('TCTACGATTGAG', IUPACUnambiguousDNA())
@@ -135,13 +137,13 @@
"""
iterator = parse(handle, format)
try:
- first = iterator.next()
+ first = next(iterator)
except StopIteration:
first = None
if first is None:
raise ValueError("No motifs found in handle")
try:
- second = iterator.next()
+ second = next(iterator)
except StopIteration:
second = None
if second is not None:
@@ -157,14 +159,14 @@
"""
import doctest
import os
- if os.path.isdir(os.path.join("..","..","Tests")):
- print "Runing doctests..."
+ if os.path.isdir(os.path.join("..", "..", "Tests")):
+ print("Runing doctests...")
cur_dir = os.path.abspath(os.curdir)
- os.chdir(os.path.join("..","..","Tests"))
+ os.chdir(os.path.join("..", "..", "Tests"))
doctest.testmod()
os.chdir(cur_dir)
del cur_dir
- print "Done"
+ print("Done")
if __name__ == "__main__":
#Run the doctests
diff -Nru python-biopython-1.62/Bio/NMR/NOEtools.py python-biopython-1.63/Bio/NMR/NOEtools.py
--- python-biopython-1.62/Bio/NMR/NOEtools.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/NMR/NOEtools.py 2013-12-05 14:10:43.000000000 +0000
@@ -6,7 +6,7 @@
# peaklist with predicted crosspeaks directly from the
# input assignment peaklist.
-import xpktools
+from . import xpktools
def predictNOE(peaklist, originNuc, detectedNuc, originResNum, toResNum):
diff -Nru python-biopython-1.62/Bio/NMR/__init__.py python-biopython-1.63/Bio/NMR/__init__.py
--- python-biopython-1.62/Bio/NMR/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/NMR/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Code for working with NMR data
This directory currently contains contributions from
diff -Nru python-biopython-1.62/Bio/NMR/xpktools.py python-biopython-1.63/Bio/NMR/xpktools.py
--- python-biopython-1.62/Bio/NMR/xpktools.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/NMR/xpktools.py 2013-12-05 14:10:43.000000000 +0000
@@ -8,6 +8,8 @@
# provides methods for extracting data by the field name
# which is listed in the last line of the peaklist header.
+from __future__ import print_function
+
import sys
# * * * * * INITIALIZATIONS * * * * *
@@ -37,7 +39,7 @@
try:
self.fields["entrynum"] = datlist[0]
- except IndexError, e:
+ except IndexError as e:
pass
@@ -47,23 +49,18 @@
# The data lines are available as a list
def __init__(self, infn):
- self.data = [] # init the data line list
+ with open(infn, 'r') as infile:
- infile = open(infn, 'r')
+ # Read in the header lines
+ self.firstline = infile.readline().split("\012")[0]
+ self.axislabels = infile.readline().split("\012")[0]
+ self.dataset = infile.readline().split("\012")[0]
+ self.sw = infile.readline().split("\012")[0]
+ self.sf = infile.readline().split("\012")[0]
+ self.datalabels = infile.readline().split("\012")[0]
- # Read in the header lines
- self.firstline = infile.readline().split("\012")[0]
- self.axislabels = infile.readline().split("\012")[0]
- self.dataset = infile.readline().split("\012")[0]
- self.sw = infile.readline().split("\012")[0]
- self.sf = infile.readline().split("\012")[0]
- self.datalabels = infile.readline().split("\012")[0]
-
- # Read in the data lines to a list
- line = infile.readline()
- while line:
- self.data.append(line.split("\012")[0])
- line = infile.readline()
+ # Read in the data lines to a list
+ self.data = [line.split("\012")[0] for line in infile]
def residue_dict(self, index):
# Generate a dictionary idexed by residue number or a nucleus
@@ -106,40 +103,19 @@
return self.dict
def write_header(self, outfn):
- outfile = _try_open_write(outfn)
- outfile.write(self.firstline)
- outfile.write("\012")
- outfile.write(self.axislabels)
- outfile.write("\012")
- outfile.write(self.dataset)
- outfile.write("\012")
- outfile.write(self.sw)
- outfile.write("\012")
- outfile.write(self.sf)
- outfile.write("\012")
- outfile.write(self.datalabels)
- outfile.write("\012")
- outfile.close()
-
-
-def _try_open_read(fn):
- # Try to open a file for reading. Exit on IOError
- try:
- infile = open(fn, 'r')
- except IOError, e:
- print "file", fn, "could not be opened for reading - quitting."
- sys.exit(0)
- return infile
-
-
-def _try_open_write(fn):
- # Try to open a file for writing. Exit on IOError
- try:
- infile = open(fn, 'w')
- except IOError, e:
- print "file", fn, "could not be opened for writing - quitting."
- sys.exit(0)
- return infile
+ with open(outfn, 'wb') as outfile:
+ outfile.write(self.firstline)
+ outfile.write("\012")
+ outfile.write(self.axislabels)
+ outfile.write("\012")
+ outfile.write(self.dataset)
+ outfile.write("\012")
+ outfile.write(self.sw)
+ outfile.write("\012")
+ outfile.write(self.sf)
+ outfile.write("\012")
+ outfile.write(self.datalabels)
+ outfile.write("\012")
def replace_entry(line, fieldn, newentry):
@@ -231,12 +207,6 @@
return outlist
-def _sort_keys(dictionary):
- keys = dictionary.keys()
- sorted_keys = keys.sort()
- return sorted_keys
-
-
def _read_dicts(fn_list, keyatom):
# Read multiple files into a list of residue dictionaries
dict_list = []
diff -Nru python-biopython-1.62/Bio/NaiveBayes.py python-biopython-1.63/Bio/NaiveBayes.py
--- python-biopython-1.62/Bio/NaiveBayes.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/NaiveBayes.py 2013-12-05 14:10:43.000000000 +0000
@@ -26,6 +26,8 @@
"""
+from __future__ import print_function
+
import numpy
@@ -156,7 +158,7 @@
nb.classes = list(set(results))
else:
class_freq = _contents(results)
- nb.classes = class_freq.keys()
+ nb.classes = list(class_freq.keys())
percs = class_freq
nb.classes.sort() # keep it tidy
@@ -230,6 +232,6 @@
carmodel = train(xcar, ycar)
carresult = classify(carmodel, ['Red', 'Sports', 'Domestic'])
- print 'Is Yes?', carresult
+ print('Is Yes? %s' % carresult)
carresult = classify(carmodel, ['Red', 'SUV', 'Domestic'])
- print 'Is No?', carresult
+ print('Is No? %s' % carresult)
diff -Nru python-biopython-1.62/Bio/NeuralNetwork/BackPropagation/Layer.py python-biopython-1.63/Bio/NeuralNetwork/BackPropagation/Layer.py
--- python-biopython-1.62/Bio/NeuralNetwork/BackPropagation/Layer.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/NeuralNetwork/BackPropagation/Layer.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Model a single layer in a nueral network.
These classes deal with a layers in the neural network (ie. the input layer,
@@ -7,6 +12,8 @@
import math
import random
+from Bio._py3k import range
+
def logistic_function(value):
"""Transform the value with the logistic function.
@@ -37,7 +44,7 @@
else:
lower_range = 1
- self.nodes = range(lower_range, num_nodes + 1)
+ self.nodes = list(range(lower_range, num_nodes + 1))
self.weights = {}
@@ -104,7 +111,7 @@
o inputs -- A list of inputs into the network -- this must be
equal to the number of nodes in the layer.
"""
- if len(inputs) != len(self.values.keys()) - 1:
+ if len(inputs) != len(self.values) - 1:
raise ValueError("Inputs do not match input layer nodes.")
# set the node values from the inputs
diff -Nru python-biopython-1.62/Bio/NeuralNetwork/BackPropagation/Network.py python-biopython-1.63/Bio/NeuralNetwork/BackPropagation/Network.py
--- python-biopython-1.62/Bio/NeuralNetwork/BackPropagation/Network.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/NeuralNetwork/BackPropagation/Network.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Represent Neural Networks.
This module contains classes to represent Generic Neural Networks that
@@ -59,7 +64,7 @@
of the prevoious weight change to use.
"""
num_iterations = 0
- while 1:
+ while True:
num_iterations += 1
training_error = 0.0
for example in training_examples:
@@ -103,10 +108,7 @@
# update the predicted values for these inputs
self._input.update(inputs)
- output_keys = self._output.values.keys()
- output_keys.sort()
-
outputs = []
- for output_key in output_keys:
+ for output_key in sorted(self._output.values):
outputs.append(self._output.values[output_key])
return outputs
diff -Nru python-biopython-1.62/Bio/NeuralNetwork/Gene/Motif.py python-biopython-1.63/Bio/NeuralNetwork/Gene/Motif.py
--- python-biopython-1.62/Bio/NeuralNetwork/Gene/Motif.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/NeuralNetwork/Gene/Motif.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Find and deal with motifs in biological sequence data.
Representing DNA (or RNA or proteins) in a neural network can be difficult
@@ -12,7 +17,7 @@
from Bio.Seq import Seq
# local modules
-from Pattern import PatternRepository
+from .Pattern import PatternRepository
class MotifFinder(object):
@@ -201,7 +206,7 @@
# as long as we have some motifs present, normalize them
# otherwise we'll just return 0 for everything
if max_count > 0:
- for motif in seq_motifs.keys():
+ for motif in seq_motifs:
seq_motifs[motif] = (float(seq_motifs[motif] - min_count)
/ float(max_count))
diff -Nru python-biopython-1.62/Bio/NeuralNetwork/Gene/Pattern.py python-biopython-1.63/Bio/NeuralNetwork/Gene/Pattern.py
--- python-biopython-1.62/Bio/NeuralNetwork/Gene/Pattern.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/NeuralNetwork/Gene/Pattern.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Generic functionality useful for all gene representations.
This module contains classes which can be used for all the different
@@ -76,7 +81,7 @@
"""
all_patterns = []
- while 1:
+ while True:
cur_line = input_handle.readline()
if not(cur_line):
@@ -90,7 +95,7 @@
if self._alphabet is not None:
# make single patterns (not signatures) into lists, so we
# can check signatures and single patterns the same
- if type(cur_pattern) != type(tuple([])):
+ if not isinstance(cur_pattern, tuple):
test_pattern = [cur_pattern]
else:
test_pattern = cur_pattern
diff -Nru python-biopython-1.62/Bio/NeuralNetwork/Gene/Schema.py python-biopython-1.63/Bio/NeuralNetwork/Gene/Schema.py
--- python-biopython-1.62/Bio/NeuralNetwork/Gene/Schema.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/NeuralNetwork/Gene/Schema.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Deal with Motifs or Signatures allowing ambiguity in the sequences.
This class contains Schema which deal with Motifs and Signatures at
@@ -11,15 +16,18 @@
motifs or signatures.
"""
# standard modules
+from __future__ import print_function
+
import random
import re
-# biopython
+from Bio._py3k import range
+
from Bio import Alphabet
from Bio.Seq import MutableSeq
# neural network libraries
-from Pattern import PatternRepository
+from .Pattern import PatternRepository
# genetic algorithm libraries
from Bio.GA import Organism
@@ -605,7 +613,7 @@
assert total_count > 0, "Expected to have motifs to match"
while (float(matched_count) / float(total_count)) < motif_percent:
new_schema, matching_motifs = \
- self._get_unique_schema(schema_info.keys(),
+ self._get_unique_schema(list(schema_info.keys()),
all_motifs, num_ambiguous)
# get the number of counts for the new schema and clean up
@@ -650,7 +658,7 @@
# doesn't match any old schema
num_tries = 0
- while 1:
+ while True:
# pick a motif to work from and make a schema from it
cur_motif = random.choice(motif_list)
@@ -704,8 +712,8 @@
new_schema_list = list(motif)
for add_ambiguous in range(num_ambiguous):
# add an ambiguous position in a new place in the motif
- while 1:
- ambig_pos = random.choice(range(len(new_schema_list)))
+ while True:
+ ambig_pos = random.choice(list(range(len(new_schema_list))))
# only add a position if it isn't already ambiguous
# otherwise, we'll try again
diff -Nru python-biopython-1.62/Bio/NeuralNetwork/Gene/Signature.py python-biopython-1.63/Bio/NeuralNetwork/Gene/Signature.py
--- python-biopython-1.62/Bio/NeuralNetwork/Gene/Signature.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/NeuralNetwork/Gene/Signature.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Find and deal with signatures in biological sequence data.
In addition to representing sequences according to motifs (see Motif.py
@@ -11,7 +16,7 @@
from Bio.Seq import Seq
# local stuff
-from Pattern import PatternRepository
+from .Pattern import PatternRepository
class SignatureFinder(object):
diff -Nru python-biopython-1.62/Bio/NeuralNetwork/StopTraining.py python-biopython-1.63/Bio/NeuralNetwork/StopTraining.py
--- python-biopython-1.62/Bio/NeuralNetwork/StopTraining.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/NeuralNetwork/StopTraining.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Classes to help deal with stopping training a neural network.
One of the key issues with training a neural network is knowning when to
@@ -13,6 +18,8 @@
"""
+from __future__ import print_function
+
class ValidationIncreaseStop(object):
"""Class to stop training on a network when the validation error increases.
@@ -51,20 +58,20 @@
"""
if num_iterations % 10 == 0:
if self.verbose:
- print "%s; Training Error:%s; Validation Error:%s"\
- % (num_iterations, training_error, validation_error)
+ print("%s; Training Error:%s; Validation Error:%s"\
+ % (num_iterations, training_error, validation_error))
if num_iterations > self.min_iterations:
if self.last_error is not None:
if validation_error > self.last_error:
if self.verbose:
- print "Validation Error increasing -- Stop"
+ print("Validation Error increasing -- Stop")
return 1
if self.max_iterations is not None:
if num_iterations > self.max_iterations:
if self.verbose:
- print "Reached maximum number of iterations -- Stop"
+ print("Reached maximum number of iterations -- Stop")
return 1
self.last_error = validation_error
diff -Nru python-biopython-1.62/Bio/NeuralNetwork/Training.py python-biopython-1.63/Bio/NeuralNetwork/Training.py
--- python-biopython-1.62/Bio/NeuralNetwork/Training.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/NeuralNetwork/Training.py 2013-12-05 14:10:43.000000000 +0000
@@ -1,3 +1,8 @@
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+#
+
"""Provide classes for dealing with Training Neural Networks.
"""
# standard modules
diff -Nru python-biopython-1.62/Bio/Nexus/Nexus.py python-biopython-1.63/Bio/Nexus/Nexus.py
--- python-biopython-1.62/Bio/Nexus/Nexus.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Nexus/Nexus.py 2013-12-05 14:10:43.000000000 +0000
@@ -9,9 +9,13 @@
Based upon 'NEXUS: An extensible file format for systematic information'
Maddison, Swofford, Maddison. 1997. Syst. Biol. 46(4):590-621
"""
-# For with in Python/Jython 2.5
-from __future__ import with_statement
+from __future__ import print_function
+from Bio._py3k import zip
+from Bio._py3k import range
+from Bio._py3k import basestring
+
+from functools import reduce
import copy
import math
import random
@@ -22,21 +26,22 @@
from Bio.Data import IUPACData
from Bio.Seq import Seq
-from Trees import Tree
+from .Trees import Tree
-INTERLEAVE=70
-SPECIAL_COMMANDS=['charstatelabels','charlabels','taxlabels', 'taxset', 'charset','charpartition','taxpartition',
- 'matrix','tree', 'utree','translate','codonposset','title']
-KNOWN_NEXUS_BLOCKS = ['trees','data', 'characters', 'taxa', 'sets','codons']
-PUNCTUATION='()[]{}/\,;:=*\'"`+-<>'
-MRBAYESSAFE='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_'
-WHITESPACE=' \t\n'
-#SPECIALCOMMENTS=['!','&','%','/','\\','@'] #original list of special comments
-SPECIALCOMMENTS=['&'] # supported special comment ('tree' command), all others are ignored
-CHARSET='chars'
-TAXSET='taxa'
-CODONPOSITIONS='codonpositions'
-DEFAULTNEXUS='#NEXUS\nbegin data; dimensions ntax=0 nchar=0; format datatype=dna; end; '
+INTERLEAVE = 70
+SPECIAL_COMMANDS = ['charstatelabels', 'charlabels', 'taxlabels', 'taxset',
+ 'charset', 'charpartition', 'taxpartition', 'matrix',
+ 'tree', 'utree', 'translate', 'codonposset', 'title']
+KNOWN_NEXUS_BLOCKS = ['trees', 'data', 'characters', 'taxa', 'sets', 'codons']
+PUNCTUATION = '()[]{}/\,;:=*\'"`+-<>'
+MRBAYESSAFE = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_'
+WHITESPACE = ' \t\n'
+#SPECIALCOMMENTS = ['!','&','%','/','\\','@'] #original list of special comments
+SPECIALCOMMENTS = ['&'] # supported special comment ('tree' command), all others are ignored
+CHARSET = 'chars'
+TAXSET = 'taxa'
+CODONPOSITIONS = 'codonpositions'
+DEFAULTNEXUS = '#NEXUS\nbegin data; dimensions ntax=0 nchar=0; format datatype=dna; end; '
class NexusError(Exception):
@@ -44,12 +49,15 @@
class CharBuffer(object):
- """Helps reading NEXUS-words and characters from a buffer."""
- def __init__(self,string):
+ """Helps reading NEXUS-words and characters from a buffer (semi-PRIVATE).
+
+ This class is not intended for public use (any more).
+ """
+ def __init__(self, string):
if string:
- self.buffer=list(string)
+ self.buffer = list(string)
else:
- self.buffer=[]
+ self.buffer = []
def peek(self):
if self.buffer:
@@ -58,21 +66,26 @@
return None
def peek_nonwhitespace(self):
- b=''.join(self.buffer).strip()
+ b = ''.join(self.buffer).strip()
if b:
return b[0]
else:
return None
- def next(self):
+ def __next__(self):
if self.buffer:
return self.buffer.pop(0)
else:
return None
+ if sys.version_info[0] < 3:
+ def next(self):
+ """Deprecated Python 2 style alias for Python 3 style __next__ method."""
+ return self.__next__()
+
def next_nonwhitespace(self):
while True:
- p=self.next()
+ p = next(self)
if p is None:
break
if p not in WHITESPACE:
@@ -81,23 +94,23 @@
def skip_whitespace(self):
while self.buffer[0] in WHITESPACE:
- self.buffer=self.buffer[1:]
+ self.buffer = self.buffer[1:]
- def next_until(self,target):
+ def next_until(self, target):
for t in target:
try:
- pos=self.buffer.index(t)
+ pos = self.buffer.index(t)
except ValueError:
pass
else:
- found=''.join(self.buffer[:pos])
- self.buffer=self.buffer[pos:]
+ found = ''.join(self.buffer[:pos])
+ self.buffer = self.buffer[pos:]
return found
else:
return None
- def peek_word(self,word):
- return ''.join(self.buffer[:len(word)])==word
+ def peek_word(self, word):
+ return ''.join(self.buffer[:len(word)]) == word
def next_word(self):
"""Return the next NEXUS word from a string.
@@ -105,32 +118,33 @@
This deals with single and double quotes, whitespace and punctuation.
"""
- word=[]
- quoted=False
- first=self.next_nonwhitespace() # get first character
+ word = []
+ quoted = False
+ first = self.next_nonwhitespace() # get first character
if not first: # return empty if only whitespace left
return None
word.append(first)
- if first=="'": # word starts with a quote
- quoted="'"
- elif first=='"':
- quoted='"'
+ if first == "'": # word starts with a quote
+ quoted = "'"
+ elif first == '"':
+ quoted = '"'
elif first in PUNCTUATION: # if it's punctuation, return immediately
return first
while True:
- c=self.peek()
- if c==quoted: # a quote?
- word.append(self.next()) # store quote
- if self.peek()==quoted: # double quote
- skip=self.next() # skip second quote
+ c = self.peek()
+ if c == quoted: # a quote?
+ word.append(next(self)) # store quote
+ if self.peek() == quoted: # double quote
+ skip = next(self) # skip second quote
elif quoted: # second single quote ends word
break
elif quoted:
- word.append(self.next()) # if quoted, then add anything
- elif not c or c in PUNCTUATION or c in WHITESPACE: # if not quoted and special character, stop
+ word.append(next(self)) # if quoted, then add anything
+ elif not c or c in PUNCTUATION or c in WHITESPACE:
+ # if not quoted and special character, stop
break
else:
- word.append(self.next()) # standard character
+ word.append(next(self)) # standard character
return ''.join(word)
def rest(self):
@@ -144,65 +158,64 @@
See Wheeler (1990), Cladistics 6:269-275.
"""
- def __init__(self,symbols,gap):
- self.data={}
- self.symbols=[s for s in symbols]
- self.symbols.sort()
+ def __init__(self, symbols, gap):
+ self.data = {}
+ self.symbols = sorted(symbols)
if gap:
self.symbols.append(gap)
for x in self.symbols:
- for y in [s for s in self.symbols if s!=x]:
- self.set(x,y,0)
+ for y in [s for s in self.symbols if s != x]:
+ self.set(x, y, 0)
- def set(self,x,y,value):
- if x>y:
- x,y=y,x
- self.data[x+y]=value
-
- def add(self,x,y,value):
- if x>y:
- x,y=y,x
- self.data[x+y]+=value
+ def set(self, x, y, value):
+ if x > y:
+ x, y = y, x
+ self.data[x + y] = value
+
+ def add(self, x, y, value):
+ if x > y:
+ x, y = y, x
+ self.data[x + y] += value
def sum(self):
- return reduce(lambda x,y:x+y,self.data.values())
+ return reduce(lambda x, y:x+y, self.data.values())
def transformation(self):
- total=self.sum()
- if total!=0:
+ total = self.sum()
+ if total != 0:
for k in self.data:
- self.data[k]=self.data[k]/float(total)
+ self.data[k] = self.data[k] / float(total)
return self
def weighting(self):
for k in self.data:
- if self.data[k]!=0:
- self.data[k]=-math.log(self.data[k])
+ if self.data[k] != 0:
+ self.data[k] = -math.log(self.data[k])
return self
- def smprint(self,name='your_name_here'):
- matrix='usertype %s stepmatrix=%d\n' % (name,len(self.symbols))
- matrix+=' %s\n' % ' '.join(self.symbols)
+ def smprint(self, name='your_name_here'):
+ matrix = 'usertype %s stepmatrix=%d\n' % (name, len(self.symbols))
+ matrix += ' %s\n' % ' '.join(self.symbols)
for x in self.symbols:
- matrix+='[%s]'.ljust(8) % x
+ matrix += '[%s]'.ljust(8) % x
for y in self.symbols:
- if x==y:
- matrix+=' . '
+ if x == y:
+ matrix += ' . '
else:
- if x>y:
- x1,y1=y,x
+ if x > y:
+ x1, y1 = y, x
else:
- x1,y1=x,y
- if self.data[x1+y1]==0:
- matrix+='inf. '
+ x1, y1 = x, y
+ if self.data[x1 + y1] == 0:
+ matrix += 'inf. '
else:
- matrix+='%2.2f'.ljust(10) % (self.data[x1+y1])
- matrix+='\n'
- matrix+=';\n'
+ matrix += '%2.2f'.ljust(10) % (self.data[x1 + y1])
+ matrix += '\n'
+ matrix += ';\n'
return matrix
-def safename(name,mrbayes=False):
+def safename(name, mrbayes=False):
"""Return a taxon identifier according to NEXUS standard.
Wrap quotes around names with punctuation or whitespace, and double
@@ -212,12 +225,12 @@
for the mrbayes software package.
"""
if mrbayes:
- safe=name.replace(' ','_')
- safe=''.join([c for c in safe if c in MRBAYESSAFE])
+ safe = name.replace(' ', '_')
+ safe = ''.join(c for c in safe if c in MRBAYESSAFE)
else:
- safe=name.replace("'","''")
+ safe = name.replace("'", "''")
if set(safe).intersection(set(WHITESPACE+PUNCTUATION)):
- safe="'"+safe+"'"
+ safe = "'" + safe + "'"
return safe
@@ -226,58 +239,54 @@
if not word:
return None
while (word.startswith("'") and word.endswith("'")) or (word.startswith('"') and word.endswith('"')):
- word=word[1:-1]
+ word = word[1:-1]
return word
-def get_start_end(sequence, skiplist=['-','?']):
+def get_start_end(sequence, skiplist=['-', '?']):
"""Return position of first and last character which is not in skiplist.
Skiplist defaults to ['-','?'])."""
- length=len(sequence)
- if length==0:
- return None,None
- end=length-1
- while end>=0 and (sequence[end] in skiplist):
- end-=1
- start=0
- while start= 0 and (sequence[end] in skiplist):
+ end -= 1
+ start = 0
+ while start < length and (sequence[start] in skiplist):
+ start += 1
+ if start == length and end == -1: # empty sequence
+ return -1, -1
else:
- return start,end
+ return start, end
def _sort_keys_by_values(p):
"""Returns a sorted list of keys of p sorted by values of p."""
- startpos=[(p[pn],pn) for pn in p if p[pn]]
- startpos.sort()
- # parenthisis added because of py3k
- return (zip(*startpos))[1]
+ return sorted((pn for pn in p if p[pn]), key = lambda pn: p[pn])
def _make_unique(l):
"""Check that all values in list are unique and return a pruned and sorted list."""
- l=list(set(l))
- l.sort()
- return l
+ return sorted(set(l))
-def _unique_label(previous_labels,label):
+def _unique_label(previous_labels, label):
"""Returns a unique name if label is already in previous_labels."""
while label in previous_labels:
if label.split('.')[-1].startswith('copy'):
- label='.'.join(label.split('.')[:-1])+'.copy'+str(eval('0'+label.split('.')[-1][4:])+1)
+ label = '.'.join(label.split('.')[:-1]) \
+ + '.copy' + str(eval('0'+label.split('.')[-1][4:])+1)
else:
- label+='.copy'
+ label += '.copy'
return label
def _seqmatrix2strmatrix(matrix):
"""Converts a Seq-object matrix to a plain sequence-string matrix."""
- return dict([(t, str(matrix[t])) for t in matrix])
+ return dict((t, str(matrix[t])) for t in matrix)
def _compact4nexus(orig_list):
@@ -287,30 +296,29 @@
if not orig_list:
return ''
- orig_list=list(set(orig_list))
- orig_list.sort()
- shortlist=[]
- clist=orig_list[:]
- clist.append(clist[-1]+.5) # dummy value makes it easier
- while len(clist)>1:
- step=1
- for i,x in enumerate(clist):
- if x==clist[0]+i*step: # are we still in the right step?
+ orig_list = sorted(set(orig_list))
+ shortlist = []
+ clist = orig_list[:]
+ clist.append(clist[-1] + .5) # dummy value makes it easier
+ while len(clist) > 1:
+ step = 1
+ for i, x in enumerate(clist):
+ if x == clist[0] + i*step: # are we still in the right step?
continue
- elif i==1 and len(clist)>3 and clist[i+1]-x==x-clist[0]:
+ elif i == 1 and len(clist) > 3 and clist[i+1] - x == x - clist[0]:
# second element, and possibly at least 3 elements to link,
# and the next one is in the right step
- step=x-clist[0]
+ step = x - clist[0]
else: # pattern broke, add all values before current position to new list
- sub=clist[:i]
- if len(sub)==1:
+ sub = clist[:i]
+ if len(sub) == 1:
shortlist.append(str(sub[0]+1))
else:
- if step==1:
- shortlist.append('%d-%d' % (sub[0]+1,sub[-1]+1))
+ if step == 1:
+ shortlist.append('%d-%d' % (sub[0]+1, sub[-1]+1))
else:
- shortlist.append('%d-%d\\%d' % (sub[0]+1,sub[-1]+1,step))
- clist=clist[i:]
+ shortlist.append('%d-%d\\%d' % (sub[0]+1, sub[-1]+1, step))
+ clist = clist[i:]
break
return ' '.join(shortlist)
@@ -325,64 +333,66 @@
if not matrices:
return None
- name=matrices[0][0]
- combined=copy.deepcopy(matrices[0][1]) # initiate with copy of first matrix
- mixed_datatypes=(len(set([n[1].datatype for n in matrices]))>1)
+ name = matrices[0][0]
+ combined = copy.deepcopy(matrices[0][1]) # initiate with copy of first matrix
+ mixed_datatypes = (len(set(n[1].datatype for n in matrices)) > 1)
if mixed_datatypes:
- combined.datatype='None' # dealing with mixed matrices is application specific. You take care of that yourself!
+ # dealing with mixed matrices is application specific.
+ # You take care of that yourself!
+ combined.datatype = 'None'
# raise NexusError('Matrices must be of same datatype')
- combined.charlabels=None
- combined.statelabels=None
- combined.interleave=False
- combined.translate=None
+ combined.charlabels = None
+ combined.statelabels = None
+ combined.interleave = False
+ combined.translate = None
# rename taxon sets and character sets and name them with prefix
- for cn,cs in combined.charsets.iteritems():
- combined.charsets['%s.%s' % (name,cn)]=cs
+ for cn, cs in combined.charsets.items():
+ combined.charsets['%s.%s' % (name, cn)]=cs
del combined.charsets[cn]
- for tn,ts in combined.taxsets.iteritems():
- combined.taxsets['%s.%s' % (name,tn)]=ts
+ for tn, ts in combined.taxsets.items():
+ combined.taxsets['%s.%s' % (name, tn)]=ts
del combined.taxsets[tn]
# previous partitions usually don't make much sense in combined matrix
# just initiate one new partition parted by single matrices
- combined.charpartitions={'combined':{name:range(combined.nchar)}}
- for n,m in matrices[1:]: # add all other matrices
- both=[t for t in combined.taxlabels if t in m.taxlabels]
- combined_only=[t for t in combined.taxlabels if t not in both]
- m_only=[t for t in m.taxlabels if t not in both]
+ combined.charpartitions = {'combined':{name:list(range(combined.nchar))}}
+ for n, m in matrices[1:]: # add all other matrices
+ both = [t for t in combined.taxlabels if t in m.taxlabels]
+ combined_only = [t for t in combined.taxlabels if t not in both]
+ m_only = [t for t in m.taxlabels if t not in both]
for t in both:
# concatenate sequences and unify gap and missing character symbols
- combined.matrix[t]+=Seq(str(m.matrix[t]).replace(m.gap,combined.gap).replace(m.missing,combined.missing),combined.alphabet)
+ combined.matrix[t] += Seq(str(m.matrix[t]).replace(m.gap, combined.gap).replace(m.missing, combined.missing), combined.alphabet)
# replace date of missing taxa with symbol for missing data
for t in combined_only:
- combined.matrix[t]+=Seq(combined.missing*m.nchar,combined.alphabet)
+ combined.matrix[t] += Seq(combined.missing*m.nchar, combined.alphabet)
for t in m_only:
- combined.matrix[t]=Seq(combined.missing*combined.nchar,combined.alphabet)+\
- Seq(str(m.matrix[t]).replace(m.gap,combined.gap).replace(m.missing,combined.missing),combined.alphabet)
+ combined.matrix[t] = Seq(combined.missing*combined.nchar, combined.alphabet) + \
+ Seq(str(m.matrix[t]).replace(m.gap, combined.gap).replace(m.missing, combined.missing), combined.alphabet)
combined.taxlabels.extend(m_only) # new taxon list
- for cn,cs in m.charsets.iteritems(): # adjust character sets for new matrix
- combined.charsets['%s.%s' % (n,cn)]=[x+combined.nchar for x in cs]
+ for cn, cs in m.charsets.items(): # adjust character sets for new matrix
+ combined.charsets['%s.%s' % (n, cn)] = [x+combined.nchar for x in cs]
if m.taxsets:
if not combined.taxsets:
- combined.taxsets={}
+ combined.taxsets = {}
# update taxon sets
- combined.taxsets.update(dict(('%s.%s' % (n,tn),ts)
- for tn,ts in m.taxsets.iteritems()))
+ combined.taxsets.update(dict(('%s.%s' % (n, tn), ts)
+ for tn, ts in m.taxsets.items()))
# update new charpartition
- combined.charpartitions['combined'][n]=range(combined.nchar,combined.nchar+m.nchar)
+ combined.charpartitions['combined'][n] = list(range(combined.nchar, combined.nchar+m.nchar))
# update charlabels
if m.charlabels:
if not combined.charlabels:
- combined.charlabels={}
- combined.charlabels.update(dict((combined.nchar+i,label)
- for (i,label) in m.charlabels.iteritems()))
- combined.nchar+=m.nchar # update nchar and ntax
- combined.ntax+=len(m_only)
+ combined.charlabels = {}
+ combined.charlabels.update(dict((combined.nchar + i, label)
+ for (i, label) in m.charlabels.items()))
+ combined.nchar += m.nchar # update nchar and ntax
+ combined.ntax += len(m_only)
# some prefer partitions, some charsets:
# make separate charset for ecah initial dataset
for c in combined.charpartitions['combined']:
- combined.charsets[c]=combined.charpartitions['combined'][c]
+ combined.charsets[c] = combined.charpartitions['combined'][c]
return combined
@@ -401,41 +411,46 @@
NOTE: this function is very slow for large files, and obsolete when using C extension cnexus
"""
- contents=iter(text)
- newtext=[]
- newline=[]
- quotelevel=''
- speciallevel=False
- commlevel=0
+ contents = iter(text)
+ newtext = []
+ newline = []
+ quotelevel = ''
+ speciallevel = False
+ commlevel = 0
#Parse with one character look ahead (for special comments)
- t2 = contents.next()
+ t2 = next(contents)
while True:
t = t2
try:
- t2 = contents.next()
+ t2 = next(contents)
except StopIteration:
t2 = None
if t is None:
break
- if t==quotelevel and not (commlevel or speciallevel): # matching quote ends quotation
- quotelevel=''
- elif not quotelevel and not (commlevel or speciallevel) and (t=='"' or t=="'"): # single or double quote starts quotation
+ if t == quotelevel and not (commlevel or speciallevel):
+ # matching quote ends quotation
+ quotelevel = ''
+ elif not quotelevel and not (commlevel or speciallevel) and (t == '"' or t == "'"):
+ # single or double quote starts quotation
quotelevel=t
- elif not quotelevel and t=='[': # opening bracket outside a quote
- if t2 in SPECIALCOMMENTS and commlevel==0 and not speciallevel:
- speciallevel=True
- else:
- commlevel+=1
- elif not quotelevel and t==']': # closing bracket ioutside a quote
+ elif not quotelevel and t == '[':
+ # opening bracket outside a quote
+ if t2 in SPECIALCOMMENTS and commlevel == 0 and not speciallevel:
+ speciallevel = True
+ else:
+ commlevel += 1
+ elif not quotelevel and t == ']':
+ # closing bracket ioutside a quote
if speciallevel:
- speciallevel=False
+ speciallevel = False
else:
- commlevel-=1
- if commlevel<0:
+ commlevel -= 1
+ if commlevel < 0:
raise NexusError('Nexus formatting error: unmatched ]')
continue
- if commlevel==0: # copy if we're not in comment
- if t==';' and not quotelevel:
+ if commlevel == 0:
+ # copy if we're not in comment
+ if t == ';' and not quotelevel:
newtext.append(''.join(newline))
newline=[]
else:
@@ -443,7 +458,7 @@
#level of comments should be 0 at the end of the file
if newline:
newtext.append('\n'.join(newline))
- if commlevel>0:
+ if commlevel > 0:
raise NexusError('Nexus formatting error: unmatched [')
return newtext
@@ -455,37 +470,35 @@
Lines are adjusted so that no linebreaks occur within a commandline
(except matrix command line)
"""
- formatted_lines=[]
+ formatted_lines = []
for l in lines:
#Convert line endings
- l=l.replace('\r\n','\n').replace('\r','\n').strip()
+ l = l.replace('\r\n', '\n').replace('\r', '\n').strip()
if l.lower().startswith('matrix'):
formatted_lines.append(l)
else:
- l=l.replace('\n',' ')
+ l = l.replace('\n', ' ')
if l:
formatted_lines.append(l)
return formatted_lines
-def _replace_parenthesized_ambigs(seq,rev_ambig_values):
+def _replace_parenthesized_ambigs(seq, rev_ambig_values):
"""Replaces ambigs in xxx(ACG)xxx format by IUPAC ambiguity code."""
- opening=seq.find('(')
- while opening>-1:
- closing=seq.find(')')
- if closing<0:
+ opening = seq.find('(')
+ while opening > -1:
+ closing = seq.find(')')
+ if closing < 0:
raise NexusError('Missing closing parenthesis in: '+seq)
- elif closing 0:
try:
options = options.replace('=', ' = ').split()
- valued_indices=[(n-1,n,n+1) for n in range(len(options)) if options[n]=='=' and n!=0 and n!=len((options))]
+ valued_indices = [(n-1, n, n+1) for n in range(len(options))
+ if options[n] == '=' and n != 0 and n != len((options))]
indices = []
for sl in valued_indices:
indices.extend(sl)
@@ -526,47 +541,47 @@
class Block(object):
"""Represent a NEXUS block with block name and list of commandlines."""
- def __init__(self,title=None):
- self.title=title
- self.commandlines=[]
+ def __init__(self, title=None):
+ self.title = title
+ self.commandlines = []
class Nexus(object):
def __init__(self, input=None):
- self.ntax=0 # number of taxa
- self.nchar=0 # number of characters
- self.unaltered_taxlabels=[] # taxlabels as the appear in the input file (incl. duplicates, etc.)
- self.taxlabels=[] # labels for taxa, ordered by their id
- self.charlabels=None # ... and for characters
- self.statelabels=None # ... and for states
- self.datatype='dna' # (standard), dna, rna, nucleotide, protein
- self.respectcase=False # case sensitivity
- self.missing='?' # symbol for missing characters
- self.gap='-' # symbol for gap
- self.symbols=None # set of symbols
- self.equate=None # set of symbol synonyms
- self.matchchar=None # matching char for matrix representation
- self.labels=None # left, right, no
- self.transpose=False # whether matrix is transposed
- self.interleave=False # whether matrix is interleaved
- self.tokens=False # unsupported
- self.eliminate=None # unsupported
- self.matrix=None # ...
- self.unknown_blocks=[] # blocks we don't care about
- self.taxsets={}
- self.charsets={}
- self.charpartitions={}
- self.taxpartitions={}
- self.trees=[] # list of Trees (instances of Tree class)
- self.translate=None # Dict to translate taxon <-> taxon numbers
- self.structured=[] # structured input representation
- self.set={} # dict of the set command to set various options
- self.options={} # dict of the options command in the data block
- self.codonposset=None # name of the charpartition that defines codon positions
+ self.ntax = 0 # number of taxa
+ self.nchar = 0 # number of characters
+ self.unaltered_taxlabels = [] # taxlabels as the appear in the input file (incl. duplicates, etc.)
+ self.taxlabels = [] # labels for taxa, ordered by their id
+ self.charlabels = None # ... and for characters
+ self.statelabels = None # ... and for states
+ self.datatype = 'dna' # (standard), dna, rna, nucleotide, protein
+ self.respectcase = False # case sensitivity
+ self.missing = '?' # symbol for missing characters
+ self.gap = '-' # symbol for gap
+ self.symbols = None # set of symbols
+ self.equate = None # set of symbol synonyms
+ self.matchchar = None # matching char for matrix representation
+ self.labels = None # left, right, no
+ self.transpose = False # whether matrix is transposed
+ self.interleave = False # whether matrix is interleaved
+ self.tokens = False # unsupported
+ self.eliminate = None # unsupported
+ self.matrix = None # ...
+ self.unknown_blocks = [] # blocks we don't care about
+ self.taxsets = {}
+ self.charsets = {}
+ self.charpartitions = {}
+ self.taxpartitions = {}
+ self.trees = [] # list of Trees (instances of Tree class)
+ self.translate = None # Dict to translate taxon <-> taxon numbers
+ self.structured = [] # structured input representation
+ self.set = {} # dict of the set command to set various options
+ self.options = {} # dict of the options command in the data block
+ self.codonposset = None # name of the charpartition that defines codon positions
# some defaults
- self.options['gapmode']='missing'
+ self.options['gapmode'] = 'missing'
if input:
self.read(input)
@@ -577,13 +592,13 @@
"""Included for backwards compatibility (DEPRECATED)."""
return self.taxlabels
- def set_original_taxon_order(self,value):
+ def set_original_taxon_order(self, value):
"""Included for backwards compatibility (DEPRECATED)."""
- self.taxlabels=value
+ self.taxlabels = value
- original_taxon_order=property(get_original_taxon_order,set_original_taxon_order)
+ original_taxon_order = property(get_original_taxon_order, set_original_taxon_order)
- def read(self,input):
+ def read(self, input):
"""Read and parse NEXUS input (a filename, file-handle, or string)."""
# 1. Assume we have the name of a file in the execution dir or a
@@ -593,30 +608,30 @@
with File.as_handle(input, 'rU') as fp:
file_contents = fp.read()
self.filename = getattr(fp, 'name', 'Unknown_nexus_file')
- except (TypeError,IOError,AttributeError):
+ except (TypeError, IOError, AttributeError):
#2 Assume we have a string from a fh.read()
if isinstance(input, basestring):
file_contents = input
- self.filename='input_string'
+ self.filename = 'input_string'
else:
- print input.strip()[:50]
+ print(input.strip()[:50])
raise NexusError('Unrecognized input: %s ...' % input[:100])
- file_contents=file_contents.strip()
+ file_contents = file_contents.strip()
if file_contents.startswith('#NEXUS'):
- file_contents=file_contents[6:]
- commandlines=_get_command_lines(file_contents)
+ file_contents = file_contents[6:]
+ commandlines = _get_command_lines(file_contents)
# get rid of stupid 'NEXUS token - in merged treefiles, this might appear multiple times'
- for i,cl in enumerate(commandlines):
+ for i, cl in enumerate(commandlines):
try:
- if cl[:6].upper()=='#NEXUS':
- commandlines[i]=cl[6:].strip()
+ if cl[:6].upper() == '#NEXUS':
+ commandlines[i] = cl[6:].strip()
except:
pass
# now loop through blocks (we parse only data in known blocks, thus ignoring non-block commands
nexus_block_gen = self._get_nexus_block(commandlines)
- while 1:
+ while True:
try:
- title, contents = nexus_block_gen.next()
+ title, contents = next(nexus_block_gen)
except StopIteration:
break
if title in KNOWN_NEXUS_BLOCKS:
@@ -624,150 +639,150 @@
else:
self._unknown_nexus_block(title, contents)
- def _get_nexus_block(self,file_contents):
+ def _get_nexus_block(self, file_contents):
"""Generator for looping through Nexus blocks."""
- inblock=False
- blocklines=[]
+ inblock = False
+ blocklines = []
while file_contents:
- cl=file_contents.pop(0)
+ cl = file_contents.pop(0)
if cl.lower().startswith('begin'):
if not inblock:
- inblock=True
- title=cl.split()[1].lower()
+ inblock = True
+ title = cl.split()[1].lower()
else:
raise NexusError('Illegal block nesting in block %s' % title)
elif cl.lower().startswith('end'):
if inblock:
- inblock=False
- yield title,blocklines
- blocklines=[]
+ inblock = False
+ yield title, blocklines
+ blocklines = []
else:
raise NexusError('Unmatched \'end\'.')
elif inblock:
blocklines.append(cl)
- def _unknown_nexus_block(self,title, contents):
+ def _unknown_nexus_block(self, title, contents):
block = Block()
block.commandlines.append(contents)
block.title = title
self.unknown_blocks.append(block)
- def _parse_nexus_block(self,title, contents):
+ def _parse_nexus_block(self, title, contents):
"""Parse a known Nexus Block (PRIVATE)."""
# attached the structered block representation
self._apply_block_structure(title, contents)
#now check for taxa,characters,data blocks. If this stuff is defined more than once
#the later occurences will override the previous ones.
- block=self.structured[-1]
+ block = self.structured[-1]
for line in block.commandlines:
try:
- getattr(self,'_'+line.command)(line.options)
+ getattr(self, '_' + line.command)(line.options)
except AttributeError:
- raise
raise NexusError('Unknown command: %s ' % line.command)
- def _title(self,options):
+ def _title(self, options):
pass
def _link(self, options):
pass
- def _dimensions(self,options):
+ def _dimensions(self, options):
if 'ntax' in options:
- self.ntax=eval(options['ntax'])
+ self.ntax = eval(options['ntax'])
if 'nchar' in options:
- self.nchar=eval(options['nchar'])
+ self.nchar = eval(options['nchar'])
- def _format(self,options):
+ def _format(self, options):
# print options
# we first need to test respectcase, then symbols (which depends on respectcase)
# then datatype (which, if standard, depends on symbols and respectcase in order to generate
# dicts for ambiguous values and alphabet
if 'respectcase' in options:
- self.respectcase=True
+ self.respectcase = True
# adjust symbols to for respectcase
if 'symbols' in options:
- self.symbols=options['symbols']
+ self.symbols = options['symbols']
if (self.symbols.startswith('"') and self.symbols.endswith('"')) or\
(self.symbold.startswith("'") and self.symbols.endswith("'")):
- self.symbols=self.symbols[1:-1].replace(' ','')
+ self.symbols = self.symbols[1:-1].replace(' ', '')
if not self.respectcase:
- self.symbols=self.symbols.lower()+self.symbols.upper()
- self.symbols=list(set(self.symbols))
+ self.symbols = self.symbols.lower() + self.symbols.upper()
+ self.symbols = list(set(self.symbols))
if 'datatype' in options:
- self.datatype=options['datatype'].lower()
- if self.datatype=='dna' or self.datatype=='nucleotide':
- self.alphabet=copy.deepcopy(IUPAC.ambiguous_dna)
- self.ambiguous_values=copy.deepcopy(IUPACData.ambiguous_dna_values)
- self.unambiguous_letters=copy.deepcopy(IUPACData.unambiguous_dna_letters)
- elif self.datatype=='rna':
- self.alphabet=copy.deepcopy(IUPAC.ambiguous_rna)
- self.ambiguous_values=copy.deepcopy(IUPACData.ambiguous_rna_values)
- self.unambiguous_letters=copy.deepcopy(IUPACData.unambiguous_rna_letters)
- elif self.datatype=='protein':
- self.alphabet=copy.deepcopy(IUPAC.protein)
- self.ambiguous_values={'B':'DN','Z':'EQ','X':copy.deepcopy(IUPACData.protein_letters)} # that's how PAUP handles it
- self.unambiguous_letters=copy.deepcopy(IUPACData.protein_letters)+'*' # stop-codon
- elif self.datatype=='standard':
+ self.datatype = options['datatype'].lower()
+ if self.datatype == 'dna' or self.datatype == 'nucleotide':
+ self.alphabet = IUPAC.IUPACAmbiguousDNA() # fresh instance!
+ self.ambiguous_values = IUPACData.ambiguous_dna_values.copy()
+ self.unambiguous_letters = IUPACData.unambiguous_dna_letters
+ elif self.datatype == 'rna':
+ self.alphabet = IUPAC.IUPACAmbiguousDNA() # fresh instance!
+ self.ambiguous_values = IUPACData.ambiguous_rna_values.copy()
+ self.unambiguous_letters = IUPACData.unambiguous_rna_letters
+ elif self.datatype == 'protein':
+ #TODO - Should this not be ExtendedIUPACProtein?
+ self.alphabet = IUPAC.IUPACProtein() # fresh instance
+ self.ambiguous_values = {'B':'DN', 'Z':'EQ', 'X':IUPACData.protein_letters}
+ # that's how PAUP handles it
+ self.unambiguous_letters = IUPACData.protein_letters + '*' # stop-codon
+ elif self.datatype == 'standard':
raise NexusError('Datatype standard is not yet supported.')
- #self.alphabet=None
- #self.ambiguous_values={}
+ #self.alphabet = None
+ #self.ambiguous_values = {}
#if not self.symbols:
- # self.symbols='01' # if nothing else defined, then 0 and 1 are the default states
- #self.unambiguous_letters=self.symbols
+ # self.symbols = '01' # if nothing else defined, then 0 and 1 are the default states
+ #self.unambiguous_letters = self.symbols
else:
- raise NexusError('Unsupported datatype: '+self.datatype)
- self.valid_characters=''.join(self.ambiguous_values)+self.unambiguous_letters
+ raise NexusError('Unsupported datatype: ' + self.datatype)
+ self.valid_characters = ''.join(self.ambiguous_values) + self.unambiguous_letters
if not self.respectcase:
- self.valid_characters=self.valid_characters.lower()+self.valid_characters.upper()
+ self.valid_characters = self.valid_characters.lower() + self.valid_characters.upper()
#we have to sort the reverse ambig coding dict key characters:
#to be sure that it's 'ACGT':'N' and not 'GTCA':'N'
- rev=dict((i[1],i[0]) for i in self.ambiguous_values.iteritems() if i[0]!='X')
- self.rev_ambiguous_values={}
- for (k,v) in rev.iteritems():
- key=[c for c in k]
- key.sort()
- self.rev_ambiguous_values[''.join(key)]=v
+ rev=dict((i[1], i[0]) for i in self.ambiguous_values.items() if i[0]!='X')
+ self.rev_ambiguous_values = {}
+ for (k, v) in rev.items():
+ key = sorted(c for c in k)
+ self.rev_ambiguous_values[''.join(key)] = v
#overwrite symbols for datype rna,dna,nucleotide
- if self.datatype in ['dna','rna','nucleotide']:
- self.symbols=self.alphabet.letters
+ if self.datatype in ['dna', 'rna', 'nucleotide']:
+ self.symbols = self.alphabet.letters
if self.missing not in self.ambiguous_values:
- self.ambiguous_values[self.missing]=self.unambiguous_letters+self.gap
- self.ambiguous_values[self.gap]=self.gap
- elif self.datatype=='standard':
+ self.ambiguous_values[self.missing] = self.unambiguous_letters+self.gap
+ self.ambiguous_values[self.gap] = self.gap
+ elif self.datatype == 'standard':
if not self.symbols:
- self.symbols=['1','0']
+ self.symbols = ['1', '0']
if 'missing' in options:
- self.missing=options['missing'][0]
+ self.missing = options['missing'][0]
if 'gap' in options:
- self.gap=options['gap'][0]
+ self.gap = options['gap'][0]
if 'equate' in options:
- self.equate=options['equate']
+ self.equate = options['equate']
if 'matchchar' in options:
- self.matchchar=options['matchchar'][0]
+ self.matchchar = options['matchchar'][0]
if 'labels' in options:
- self.labels=options['labels']
+ self.labels = options['labels']
if 'transpose' in options:
raise NexusError('TRANSPOSE is not supported!')
- self.transpose=True
+ self.transpose = True
if 'interleave' in options:
- if options['interleave'] is None or options['interleave'].lower()=='yes':
- self.interleave=True
+ if options['interleave'] is None or options['interleave'].lower() == 'yes':
+ self.interleave = True
if 'tokens' in options:
- self.tokens=True
+ self.tokens = True
if 'notokens' in options:
- self.tokens=False
+ self.tokens = False
- def _set(self,options):
- self.set=options
+ def _set(self, options):
+ self.set = options
- def _options(self,options):
- self.options=options
+ def _options(self, options):
+ self.options = options
- def _eliminate(self,options):
- self.eliminate=options
+ def _eliminate(self, options):
+ self.eliminate = options
- def _taxlabels(self,options):
+ def _taxlabels(self, options):
"""Get taxon labels (PRIVATE).
As the taxon names are already in the matrix, this is superfluous
@@ -776,358 +791,357 @@
taxon names easier.
"""
pass
- #self.taxlabels=[]
- #opts=CharBuffer(options)
+ #self.taxlabels = []
+ #opts = CharBuffer(options)
#while True:
- # taxon=quotestrip(opts.next_word())
+ # taxon = quotestrip(opts.next_word())
# if not taxon:
# break
# self.taxlabels.append(taxon)
- def _check_taxlabels(self,taxon):
+ def _check_taxlabels(self, taxon):
"""Check for presence of taxon in self.taxlabels."""
# According to NEXUS standard, underscores shall be treated as spaces...,
# so checking for identity is more difficult
- nextaxa=dict([(t.replace(' ','_'),t) for t in self.taxlabels])
- nexid=taxon.replace(' ','_')
+ nextaxa = dict((t.replace(' ', '_'), t) for t in self.taxlabels)
+ nexid = taxon.replace(' ', '_')
return nextaxa.get(nexid)
- def _charlabels(self,options):
- self.charlabels={}
- opts=CharBuffer(options)
+ def _charlabels(self, options):
+ self.charlabels = {}
+ opts = CharBuffer(options)
while True:
- try:
- # get id and state
- w=opts.next_word()
- if w is None: # McClade saves and reads charlabel-lists with terminal comma?!
- break
- identifier=self._resolve(w,set_type=CHARSET)
- state=quotestrip(opts.next_word())
- self.charlabels[identifier]=state
- # check for comma or end of command
- c=opts.next_nonwhitespace()
- if c is None:
- break
- elif c!=',':
- raise NexusError('Missing \',\' in line %s.' % options)
- except NexusError:
- raise
- except:
- raise NexusError('Format error in line %s.' % options)
+ # get id and state
+ w = opts.next_word()
+ if w is None: # McClade saves and reads charlabel-lists with terminal comma?!
+ break
+ identifier = self._resolve(w, set_type=CHARSET)
+ state = quotestrip(opts.next_word())
+ self.charlabels[identifier] = state
+ # check for comma or end of command
+ c = opts.next_nonwhitespace()
+ if c is None:
+ break
+ elif c != ',':
+ raise NexusError('Missing \',\' in line %s.' % options)
- def _charstatelabels(self,options):
+ def _charstatelabels(self, options):
# warning: charstatelabels supports only charlabels-syntax!
self._charlabels(options)
- def _statelabels(self,options):
- #self.charlabels=options
+ def _statelabels(self, options):
+ #self.charlabels = options
#print 'Command statelabels is not supported and will be ignored.'
pass
- def _matrix(self,options):
+ def _matrix(self, options):
if not self.ntax or not self.nchar:
raise NexusError('Dimensions must be specified before matrix!')
- self.matrix={}
- taxcount=0
- first_matrix_block=True
+ self.matrix = {}
+ taxcount = 0
+ first_matrix_block = True
#eliminate empty lines and leading/trailing whitespace
- lines=[l.strip() for l in options.split('\n') if l.strip()!='']
- lineiter=iter(lines)
- while 1:
+ lines = [l.strip() for l in options.split('\n') if l.strip() != '']
+ lineiter = iter(lines)
+ while True:
try:
- l=lineiter.next()
+ l = next(lineiter)
except StopIteration:
- if taxcountself.ntax:
+ elif taxcount > self.ntax:
raise NexusError('Too many taxa in matrix.')
else:
break
# count the taxa and check for interleaved matrix
- taxcount+=1
+ taxcount += 1
##print taxcount
- if taxcount>self.ntax:
+ if taxcount > self.ntax:
if not self.interleave:
raise NexusError('Too many taxa in matrix - should matrix be interleaved?')
else:
- taxcount=1
- first_matrix_block=False
+ taxcount = 1
+ first_matrix_block = False
#get taxon name and sequence
- linechars=CharBuffer(l)
- id=quotestrip(linechars.next_word())
- l=linechars.rest().strip()
- chars=''
+ linechars = CharBuffer(l)
+ id = quotestrip(linechars.next_word())
+ l = linechars.rest().strip()
+ chars = ''
if self.interleave:
#interleaved matrix
#print 'In interleave'
if l:
- chars=''.join(l.split())
+ chars = ''.join(l.split())
else:
- chars=''.join(lineiter.next().split())
+ chars = ''.join(next(lineiter).split())
else:
#non-interleaved matrix
- chars=''.join(l.split())
+ chars = ''.join(l.split())
while len(chars)1:
+ codonname = [n for n in self.charpartitions if n not in prev_partitions]
+ if codonname == [] or len(codonname) > 1:
raise NexusError('Formatting Error in codonposset: %s ' % options)
else:
- self.codonposset=codonname[0]
+ self.codonposset = codonname[0]
- def _codeset(self,options):
+ def _codeset(self, options):
pass
def _charpartition(self, options):
- charpartition={}
- quotelevel=False
- opts=CharBuffer(options)
- name=self._name_n_vector(opts)
+ charpartition = {}
+ quotelevel = False
+ opts = CharBuffer(options)
+ name = self._name_n_vector(opts)
if not name:
raise NexusError('Formatting error in charpartition: %s ' % options)
# now collect thesubbpartitions and parse them
# subpartitons separated by commas - which unfortunately could be part of a quoted identifier...
- sub=''
+ sub = ''
while True:
- w=opts.next()
- if w is None or (w==',' and not quotelevel):
- subname,subindices=self._get_indices(sub,set_type=CHARSET,separator=':')
- charpartition[subname]=_make_unique(subindices)
- sub=''
+ w = next(opts)
+ if w is None or (w == ',' and not quotelevel):
+ subname, subindices = self._get_indices(sub, set_type=CHARSET, separator=':')
+ charpartition[subname] = _make_unique(subindices)
+ sub = ''
if w is None:
break
else:
- if w=="'":
- quotelevel=not quotelevel
- sub+=w
+ if w == "'":
+ quotelevel = not quotelevel
+ sub += w
self.charpartitions[name]=charpartition
- def _get_indices(self,options,set_type=CHARSET,separator='='):
+ def _get_indices(self, options, set_type=CHARSET, separator='='):
"""Parse the taxset/charset specification (PRIVATE).
e.g. '1 2 3 - 5 dog cat 10 - 20 \\ 3'
--> [0,1,2,3,4,'dog','cat',9,12,15,18]
"""
- opts=CharBuffer(options)
- name=self._name_n_vector(opts,separator=separator)
- indices=self._parse_list(opts,set_type=set_type)
+ opts = CharBuffer(options)
+ name = self._name_n_vector(opts, separator=separator)
+ indices = self._parse_list(opts, set_type=set_type)
if indices is None:
raise NexusError('Formatting error in line: %s ' % options)
- return name,indices
+ return name, indices
- def _name_n_vector(self,opts,separator='='):
+ def _name_n_vector(self, opts, separator='='):
"""Extract name and check that it's not in vector format."""
- rest=opts.rest()
- name=opts.next_word()
+ rest = opts.rest()
+ name = opts.next_word()
# we ignore * before names
- if name=='*':
- name=opts.next_word()
+ if name == '*':
+ name = opts.next_word()
if not name:
raise NexusError('Formatting error in line: %s ' % rest)
- name=quotestrip(name)
- if opts.peek_nonwhitespace=='(':
- open=opts.next_nonwhitespace()
- qualifier=open.next_word()
- close=opts.next_nonwhitespace()
- if qualifier.lower()=='vector':
+ name = quotestrip(name)
+ if opts.peek_nonwhitespace == '(':
+ open = opts.next_nonwhitespace()
+ qualifier = open.next_word()
+ close = opts.next_nonwhitespace()
+ if qualifier.lower() == 'vector':
raise NexusError('Unsupported VECTOR format in line %s'
% (opts))
- elif qualifier.lower()!='standard':
+ elif qualifier.lower() != 'standard':
raise NexusError('Unknown qualifier %s in line %s'
% (qualifier, opts))
- if opts.next_nonwhitespace()!=separator:
+ if opts.next_nonwhitespace() != separator:
raise NexusError('Formatting error in line: %s ' % rest)
return name
- def _parse_list(self,options_buffer,set_type):
+ def _parse_list(self, options_buffer, set_type):
"""Parse a NEXUS list (PRIVATE).
e.g. [1, 2, 4-8\\2, dog, cat] --> [1,2,4,6,8,17,21],
(assuming dog is taxon no. 17 and cat is taxon no. 21).
"""
- plain_list=[]
+ plain_list = []
if options_buffer.peek_nonwhitespace():
- try: # capture all possible exceptions and treat them as formatting erros, if they are not NexusError
+ try:
+ # capture all possible exceptions and treat them as formatting
+ # errors, if they are not NexusError
while True:
- identifier=options_buffer.next_word() # next list element
- if not identifier: # end of list?
+ identifier = options_buffer.next_word() # next list element
+ if not identifier: # end of list?
break
- start=self._resolve(identifier,set_type=set_type)
- if options_buffer.peek_nonwhitespace()=='-': # followd by -
- end=start
- step=1
+ start = self._resolve(identifier, set_type=set_type)
+ if options_buffer.peek_nonwhitespace() == '-': # followd by -
+ end = start
+ step = 1
# get hyphen and end of range
- hyphen=options_buffer.next_nonwhitespace()
- end=self._resolve(options_buffer.next_word(),set_type=set_type)
- if set_type==CHARSET:
- if options_buffer.peek_nonwhitespace()=='\\': # followd by \
- backslash=options_buffer.next_nonwhitespace()
- step=int(options_buffer.next_word()) # get backslash and step
- plain_list.extend(range(start,end+1,step))
+ hyphen = options_buffer.next_nonwhitespace()
+ end = self._resolve(options_buffer.next_word(), set_type=set_type)
+ if set_type == CHARSET:
+ if options_buffer.peek_nonwhitespace() == '\\': # followd by \
+ backslash = options_buffer.next_nonwhitespace()
+ step = int(options_buffer.next_word()) # get backslash and step
+ plain_list.extend(range(start, end+1, step))
else:
- if type(start)==list or type(end)==list:
+ if isinstance(start, list) or isinstance(end, list):
raise NexusError('Name if character sets not allowed in range definition: %s'
% identifier)
- start=self.taxlabels.index(start)
- end=self.taxlabels.index(end)
- taxrange=self.taxlabels[start:end+1]
+ start = self.taxlabels.index(start)
+ end = self.taxlabels.index(end)
+ taxrange = self.taxlabels[start:end+1]
plain_list.extend(taxrange)
else:
- if type(start)==list: # start was the name of charset or taxset
+ if isinstance(start, list): # start was the name of charset or taxset
plain_list.extend(start)
else: # start was an ordinary identifier
plain_list.append(start)
@@ -1137,7 +1151,7 @@
return None
return plain_list
- def _resolve(self,identifier,set_type=None):
+ def _resolve(self, identifier, set_type=None):
"""Translate identifier in list into character/taxon index.
Characters (which are referred to by their index in Nexus.py):
@@ -1150,16 +1164,16 @@
Names are returned unchanged (if plain taxon identifiers), or the names in
the corresponding taxon set is returned.
"""
- identifier=quotestrip(identifier)
+ identifier = quotestrip(identifier)
if not set_type:
raise NexusError('INTERNAL ERROR: Need type to resolve identifier.')
- if set_type==CHARSET:
+ if set_type == CHARSET:
try:
- n=int(identifier)
+ n = int(identifier)
except ValueError:
- if self.charlabels and identifier in self.charlabels.itervalues():
+ if self.charlabels and identifier in self.charlabels.values():
for k in self.charlabels:
- if self.charlabels[k]==identifier:
+ if self.charlabels[k] == identifier:
return k
elif self.charsets and identifier in self.charsets:
return self.charsets[identifier]
@@ -1167,16 +1181,16 @@
raise NexusError('Unknown character identifier: %s'
% identifier)
else:
- if n<=self.nchar:
+ if n <= self.nchar:
return n-1
else:
raise NexusError('Illegal character identifier: %d>nchar (=%d).'
- % (identifier,self.nchar))
- elif set_type==TAXSET:
+ % (identifier, self.nchar))
+ elif set_type == TAXSET:
try:
- n=int(identifier)
+ n = int(identifier)
except ValueError:
- taxlabels_id=self._check_taxlabels(identifier)
+ taxlabels_id = self._check_taxlabels(identifier)
if taxlabels_id:
return taxlabels_id
elif self.taxsets and identifier in self.taxsets:
@@ -1185,11 +1199,11 @@
raise NexusError('Unknown taxon identifier: %s'
% identifier)
else:
- if n>0 and n<=self.ntax:
+ if n > 0 and n <= self.ntax:
return self.taxlabels[n-1]
else:
raise NexusError('Illegal taxon identifier: %d>ntax (=%d).'
- % (identifier,self.ntax))
+ % (identifier, self.ntax))
else:
raise NexusError('Unknown set specification: %s.'% set_type)
@@ -1209,8 +1223,9 @@
#Not implemented
pass
- def write_nexus_data_partitions(self, matrix=None, filename=None, blocksize=None, interleave=False,
- exclude=[], delete=[], charpartition=None, comment='',mrbayes=False):
+ def write_nexus_data_partitions(self, matrix=None, filename=None, blocksize=None,
+ interleave=False, exclude=[], delete=[],
+ charpartition=None, comment='', mrbayes=False):
"""Writes a nexus file for each partition in charpartition.
Only non-excluded characters and non-deleted taxa are included,
@@ -1218,39 +1233,39 @@
"""
if not matrix:
- matrix=self.matrix
+ matrix = self.matrix
if not matrix:
return
if not filename:
- filename=self.filename
+ filename = self.filename
if charpartition:
- pfilenames={}
+ pfilenames = {}
for p in charpartition:
- total_exclude=[]+exclude
- total_exclude.extend([c for c in range(self.nchar) if c not in charpartition[p]])
- total_exclude=_make_unique(total_exclude)
- pcomment=comment+'\nPartition: '+p+'\n'
- dot=filename.rfind('.')
- if dot>0:
- pfilename=filename[:dot]+'_'+p+'.data'
+ total_exclude = [] + exclude
+ total_exclude.extend(c for c in range(self.nchar) if c not in charpartition[p])
+ total_exclude = _make_unique(total_exclude)
+ pcomment = comment + '\nPartition: ' + p + '\n'
+ dot = filename.rfind('.')
+ if dot > 0:
+ pfilename = filename[:dot] + '_' + p + '.data'
else:
- pfilename=filename+'_'+p
- pfilenames[p]=pfilename
- self.write_nexus_data(filename=pfilename,matrix=matrix,blocksize=blocksize,
- interleave=interleave,exclude=total_exclude,delete=delete,comment=pcomment,append_sets=False,
- mrbayes=mrbayes)
+ pfilename = filename+'_'+p
+ pfilenames[p] = pfilename
+ self.write_nexus_data(filename=pfilename, matrix=matrix, blocksize=blocksize,
+ interleave=interleave, exclude=total_exclude, delete=delete,
+ comment=pcomment, append_sets=False, mrbayes=mrbayes)
return pfilenames
else:
fn=self.filename+'.data'
- self.write_nexus_data(filename=fn,matrix=matrix,blocksize=blocksize,interleave=interleave,
- exclude=exclude,delete=delete,comment=comment,append_sets=False,
- mrbayes=mrbayes)
+ self.write_nexus_data(filename=fn, matrix=matrix, blocksize=blocksize,
+ interleave=interleave, exclude=exclude, delete=delete,
+ comment=comment, append_sets=False, mrbayes=mrbayes)
return fn
def write_nexus_data(self, filename=None, matrix=None, exclude=[], delete=[],
- blocksize=None, interleave=False, interleave_by_partition=False,
- comment=None,omit_NEXUS=False,append_sets=True,mrbayes=False,
- codons_block=True):
+ blocksize=None, interleave=False, interleave_by_partition=False,
+ comment=None, omit_NEXUS=False, append_sets=True, mrbayes=False,
+ codons_block=True):
"""Writes a nexus file with data and sets block to a file or handle.
Character sets and partitions are appended by default, and are
@@ -1268,11 +1283,11 @@
Returns the filename/handle used to write the data.
"""
if not matrix:
- matrix=self.matrix
+ matrix = self.matrix
if not matrix:
return
if not filename:
- filename=self.filename
+ filename = self.filename
if [t for t in delete if not self._check_taxlabels(t)]:
raise NexusError('Unknown taxa: %s'
% ', '.join(set(delete).difference(set(self.taxlabels))))
@@ -1280,177 +1295,181 @@
if not interleave_by_partition in self.charpartitions:
raise NexusError('Unknown partition: %r' % interleave_by_partition)
else:
- partition=self.charpartitions[interleave_by_partition]
+ partition = self.charpartitions[interleave_by_partition]
# we need to sort the partition names by starting position before we exclude characters
- names=_sort_keys_by_values(partition)
- newpartition={}
+ names = _sort_keys_by_values(partition)
+ newpartition = {}
for p in partition:
- newpartition[p]=[c for c in partition[p] if c not in exclude]
+ newpartition[p] = [c for c in partition[p] if c not in exclude]
# how many taxa and how many characters are left?
- undelete=[taxon for taxon in self.taxlabels if taxon in matrix and taxon not in delete]
- cropped_matrix=_seqmatrix2strmatrix(self.crop_matrix(matrix,exclude=exclude,delete=delete))
- ntax_adjusted=len(undelete)
- nchar_adjusted=len(cropped_matrix[undelete[0]])
- if not undelete or (undelete and undelete[0]==''):
+ undelete = [taxon for taxon in self.taxlabels if taxon in matrix and taxon not in delete]
+ cropped_matrix = _seqmatrix2strmatrix(self.crop_matrix(matrix, exclude=exclude, delete=delete))
+ ntax_adjusted = len(undelete)
+ nchar_adjusted = len(cropped_matrix[undelete[0]])
+ if not undelete or (undelete and undelete[0] == ''):
return
with File.as_handle(filename, mode='w') as fh:
if not omit_NEXUS:
fh.write('#NEXUS\n')
if comment:
- fh.write('['+comment+']\n')
+ fh.write('[' + comment + ']\n')
fh.write('begin data;\n')
fh.write('\tdimensions ntax=%d nchar=%d;\n' % (ntax_adjusted, nchar_adjusted))
- fh.write('\tformat datatype='+self.datatype)
+ fh.write('\tformat datatype=' + self.datatype)
if self.respectcase:
fh.write(' respectcase')
if self.missing:
- fh.write(' missing='+self.missing)
+ fh.write(' missing=' + self.missing)
if self.gap:
- fh.write(' gap='+self.gap)
+ fh.write(' gap=' + self.gap)
if self.matchchar:
- fh.write(' matchchar='+self.matchchar)
+ fh.write(' matchchar=' + self.matchchar)
if self.labels:
- fh.write(' labels='+self.labels)
+ fh.write(' labels=' + self.labels)
if self.equate:
- fh.write(' equate='+self.equate)
+ fh.write(' equate=' + self.equate)
if interleave or interleave_by_partition:
fh.write(' interleave')
fh.write(';\n')
#if self.taxlabels:
# fh.write('taxlabels '+' '.join(self.taxlabels)+';\n')
if self.charlabels:
- newcharlabels=self._adjust_charlabels(exclude=exclude)
- clkeys=sorted(newcharlabels)
- fh.write('charlabels '+', '.join(["%s %s" % (k+1,safename(newcharlabels[k])) for k in clkeys])+';\n')
+ newcharlabels = self._adjust_charlabels(exclude=exclude)
+ clkeys = sorted(newcharlabels)
+ fh.write('charlabels '
+ + ', '.join("%s %s" % (k+1, safename(newcharlabels[k])) for k in clkeys)
+ + ';\n')
fh.write('matrix\n')
if not blocksize:
if interleave:
- blocksize=70
+ blocksize = 70
else:
- blocksize=self.nchar
+ blocksize = self.nchar
# delete deleted taxa and ecxclude excluded characters...
- namelength=max([len(safename(t,mrbayes=mrbayes)) for t in undelete])
+ namelength = max(len(safename(t, mrbayes=mrbayes)) for t in undelete)
if interleave_by_partition:
# interleave by partitions, but adjust partitions with regard to excluded characters
- seek=0
+ seek = 0
for p in names:
- fh.write('[%s: %s]\n' % (interleave_by_partition,p))
- if len(newpartition[p])>0:
+ fh.write('[%s: %s]\n' % (interleave_by_partition, p))
+ if len(newpartition[p]) > 0:
for taxon in undelete:
- fh.write(safename(taxon,mrbayes=mrbayes).ljust(namelength+1))
+ fh.write(safename(taxon, mrbayes=mrbayes).ljust(namelength+1))
fh.write(cropped_matrix[taxon][seek:seek+len(newpartition[p])]+'\n')
fh.write('\n')
else:
fh.write('[empty]\n\n')
- seek+=len(newpartition[p])
+ seek += len(newpartition[p])
elif interleave:
- for seek in range(0,nchar_adjusted,blocksize):
+ for seek in range(0, nchar_adjusted, blocksize):
for taxon in undelete:
- fh.write(safename(taxon,mrbayes=mrbayes).ljust(namelength+1))
+ fh.write(safename(taxon, mrbayes=mrbayes).ljust(namelength+1))
fh.write(cropped_matrix[taxon][seek:seek+blocksize]+'\n')
fh.write('\n')
else:
for taxon in undelete:
if blocksize'+safename(taxon)+'\n')
- for i in range(0, len(str(self.matrix[taxon])), width):
- fh.write(str(self.matrix[taxon])[i:i+width] + '\n')
- fh.close()
+ filename = self.filename+'.fas'
+ with open(filename, 'w') as fh:
+ for taxon in self.taxlabels:
+ fh.write('>' + safename(taxon) + '\n')
+ for i in range(0, len(str(self.matrix[taxon])), width):
+ fh.write(str(self.matrix[taxon])[i:i+width] + '\n')
return filename
def export_phylip(self, filename=None):
@@ -1459,296 +1478,290 @@
Note that this writes a relaxed PHYLIP format file, where the names
are not truncated, nor checked for invalid characters."""
if not filename:
- if '.' in self.filename and self.filename.split('.')[-1].lower() in ['paup','nexus','nex','dat']:
- filename='.'.join(self.filename.split('.')[:-1])+'.phy'
+ if '.' in self.filename and self.filename.split('.')[-1].lower() in ['paup', 'nexus', 'nex', 'dat']:
+ filename = '.'.join(self.filename.split('.')[:-1])+'.phy'
else:
- filename=self.filename+'.phy'
- fh=open(filename,'w')
- fh.write('%d %d\n' % (self.ntax,self.nchar))
- for taxon in self.taxlabels:
- fh.write('%s %s\n' % (safename(taxon), str(self.matrix[taxon])))
- fh.close()
+ filename = self.filename+'.phy'
+ with open(filename, 'w') as fh:
+ fh.write('%d %d\n' % (self.ntax, self.nchar))
+ for taxon in self.taxlabels:
+ fh.write('%s %s\n' % (safename(taxon), str(self.matrix[taxon])))
return filename
- def constant(self,matrix=None,delete=[],exclude=[]):
+ def constant(self, matrix=None, delete=[], exclude=[]):
"""Return a list with all constant characters."""
if not matrix:
- matrix=self.matrix
- undelete=[t for t in self.taxlabels if t in matrix and t not in delete]
+ matrix = self.matrix
+ undelete = [t for t in self.taxlabels if t in matrix and t not in delete]
if not undelete:
return None
- elif len(undelete)==1:
+ elif len(undelete) == 1:
return [x for x in range(len(matrix[undelete[0]])) if x not in exclude]
# get the first sequence and expand all ambiguous values
- constant=[(x,self.ambiguous_values.get(n.upper(),n.upper())) for
- x,n in enumerate(str(matrix[undelete[0]])) if x not in exclude]
+ constant = [(x, self.ambiguous_values.get(n.upper(), n.upper())) for
+ x, n in enumerate(str(matrix[undelete[0]])) if x not in exclude]
for taxon in undelete[1:]:
- newconstant=[]
+ newconstant = []
for site in constant:
#print '%d (paup=%d)' % (site[0],site[0]+1),
- seqsite=matrix[taxon][site[0]].upper()
+ seqsite = matrix[taxon][site[0]].upper()
#print seqsite,'checked against',site[1],'\t',
- if seqsite==self.missing or (seqsite==self.gap and self.options['gapmode'].lower()=='missing') or seqsite==site[1]:
+ if seqsite == self.missing \
+ or (seqsite == self.gap and self.options['gapmode'].lower() == 'missing') \
+ or seqsite == site[1]:
# missing or same as before -> ok
newconstant.append(site)
- elif seqsite in site[1] or site[1]==self.missing or (self.options['gapmode'].lower()=='missing' and site[1]==self.gap):
+ elif seqsite in site[1] \
+ or site[1] == self.missing \
+ or (self.options['gapmode'].lower() == 'missing' and site[1] == self.gap):
# subset of an ambig or only missing in previous -> take subset
- newconstant.append((site[0],self.ambiguous_values.get(seqsite,seqsite)))
- elif seqsite in self.ambiguous_values: # is it an ambig: check the intersection with prev. values
+ newconstant.append((site[0], self.ambiguous_values.get(seqsite, seqsite)))
+ elif seqsite in self.ambiguous_values:
+ # is it an ambig: check the intersection with prev. values
intersect = set(self.ambiguous_values[seqsite]).intersection(set(site[1]))
if intersect:
- newconstant.append((site[0],''.join(intersect)))
+ newconstant.append((site[0], ''.join(intersect)))
# print 'ok'
#else:
# print 'failed'
#else:
# print 'failed'
- constant=newconstant
- cpos=[s[0] for s in constant]
+ constant = newconstant
+ cpos = [s[0] for s in constant]
return cpos
- def cstatus(self,site,delete=[],narrow=True):
+ def cstatus(self, site, delete=[], narrow=True):
"""Summarize character.
narrow=True: paup-mode (a c ? --> ac; ? ? ? --> ?)
narrow=false: (a c ? --> a c g t -; ? ? ? --> a c g t -)
"""
- undelete=[t for t in self.taxlabels if t not in delete]
+ undelete = [t for t in self.taxlabels if t not in delete]
if not undelete:
return None
- cstatus=[]
+ cstatus = []
for t in undelete:
- c=self.matrix[t][site].upper()
- if self.options.get('gapmode')=='missing' and c==self.gap:
- c=self.missing
- if narrow and c==self.missing:
+ c = self.matrix[t][site].upper()
+ if self.options.get('gapmode') == 'missing' and c == self.gap:
+ c = self.missing
+ if narrow and c == self.missing:
if c not in cstatus:
cstatus.append(c)
else:
- cstatus.extend([b for b in self.ambiguous_values[c] if b not in cstatus])
+ cstatus.extend(b for b in self.ambiguous_values[c] if b not in cstatus)
if self.missing in cstatus and narrow and len(cstatus)>1:
- cstatus=[c for c in cstatus if c!=self.missing]
+ cstatus = [c for c in cstatus if c != self.missing]
cstatus.sort()
return cstatus
- def weighted_stepmatrix(self,name='your_name_here',exclude=[],delete=[]):
+ def weighted_stepmatrix(self, name='your_name_here', exclude=[], delete=[]):
"""Calculates a stepmatrix for weighted parsimony.
See Wheeler (1990), Cladistics 6:269-275 and
Felsenstein (1981), Biol. J. Linn. Soc. 16:183-196
"""
- m=StepMatrix(self.unambiguous_letters,self.gap)
+ m = StepMatrix(self.unambiguous_letters, self.gap)
for site in [s for s in range(self.nchar) if s not in exclude]:
- cstatus=self.cstatus(site,delete)
- for i,b1 in enumerate(cstatus[:-1]):
+ cstatus = self.cstatus(site, delete)
+ for i, b1 in enumerate(cstatus[:-1]):
for b2 in cstatus[i+1:]:
- m.add(b1.upper(),b2.upper(),1)
+ m.add(b1.upper(), b2.upper(), 1)
return m.transformation().weighting().smprint(name=name)
- def crop_matrix(self,matrix=None, delete=[], exclude=[]):
+ def crop_matrix(self, matrix=None, delete=[], exclude=[]):
"""Return a matrix without deleted taxa and excluded characters."""
if not matrix:
- matrix=self.matrix
+ matrix = self.matrix
if [t for t in delete if not self._check_taxlabels(t)]:
raise NexusError('Unknown taxa: %s'
% ', '.join(set(delete).difference(self.taxlabels)))
- if exclude!=[]:
- undelete=[t for t in self.taxlabels if t in matrix and t not in delete]
+ if exclude != []:
+ undelete = [t for t in self.taxlabels if t in matrix and t not in delete]
if not undelete:
return {}
- m=[str(matrix[k]) for k in undelete]
- zipped_m=zip(*m)
- sitesm=[s for i,s in enumerate(zipped_m) if i not in exclude]
- if sitesm==[]:
- return dict([(t,Seq('',self.alphabet)) for t in undelete])
- else:
- zipped_sitesm=zip(*sitesm)
- m=[Seq(s,self.alphabet) for s in map(''.join,zipped_sitesm)]
- return dict(zip(undelete,m))
+ m = [str(matrix[k]) for k in undelete]
+ sitesm = [s for i, s in enumerate(zip(*m)) if i not in exclude]
+ if sitesm == []:
+ return dict((t, Seq('', self.alphabet)) for t in undelete)
+ else:
+ m = [Seq(s, self.alphabet) for s in (''.join(x) for x in zip(*sitesm))]
+ return dict(zip(undelete, m))
else:
- return dict([(t,matrix[t]) for t in self.taxlabels if t in matrix and t not in delete])
+ return dict((t, matrix[t]) for t in self.taxlabels if t in matrix and t not in delete)
- def bootstrap(self,matrix=None,delete=[],exclude=[]):
+ def bootstrap(self, matrix=None, delete=[], exclude=[]):
"""Return a bootstrapped matrix."""
if not matrix:
- matrix=self.matrix
- seqobjects=isinstance(matrix[matrix.keys()[0]],Seq) # remember if Seq objects
- cm=self.crop_matrix(delete=delete,exclude=exclude) # crop data out
+ matrix = self.matrix
+ seqobjects = isinstance(matrix[list(matrix.keys())[0]], Seq) # remember if Seq objects
+ cm = self.crop_matrix(delete=delete, exclude=exclude) # crop data out
if not cm: # everything deleted?
return {}
- elif len(cm[cm.keys()[0]])==0: # everything excluded?
+ elif not cm[list(cm.keys())[0]]: # everything excluded?
return cm
- undelete=[t for t in self.taxlabels if t in cm]
+ undelete = [t for t in self.taxlabels if t in cm]
if seqobjects:
- sitesm=zip(*[str(cm[t]) for t in undelete])
- alphabet=matrix[matrix.keys()[0]].alphabet
+ sitesm = list(zip(*[str(cm[t]) for t in undelete]))
+ alphabet = matrix[list(matrix.keys())[0]].alphabet
else:
- sitesm=zip(*[cm[t] for t in undelete])
- bootstrapsitesm=[sitesm[random.randint(0,len(sitesm)-1)] for i in range(len(sitesm))]
- bootstrapseqs=map(''.join,zip(*bootstrapsitesm))
+ sitesm = list(zip(*[cm[t] for t in undelete]))
+ bootstrapsitesm = [sitesm[random.randint(0, len(sitesm)-1)] for i in range(len(sitesm))]
+ bootstrapseqs = [''.join(x) for x in zip(*bootstrapsitesm)]
if seqobjects:
- bootstrapseqs=[Seq(s,alphabet) for s in bootstrapseqs]
- return dict(zip(undelete,bootstrapseqs))
+ bootstrapseqs = [Seq(s, alphabet) for s in bootstrapseqs]
+ return dict(zip(undelete, bootstrapseqs))
- def add_sequence(self,name,sequence):
+ def add_sequence(self, name, sequence):
"""Adds a sequence (string) to the matrix."""
if not name:
raise NexusError('New sequence must have a name')
- diff=self.nchar-len(sequence)
- if diff<0:
- self.insert_gap(self.nchar,-diff)
- elif diff>0:
- sequence+=self.missing*diff
+ diff = self.nchar-len(sequence)
+ if diff < 0:
+ self.insert_gap(self.nchar, -diff)
+ elif diff > 0:
+ sequence += self.missing*diff
if name in self.taxlabels:
- unique_name=_unique_label(self.taxlabels,name)
+ unique_name = _unique_label(self.taxlabels, name)
#print "WARNING: Sequence name %s is already present. Sequence was added as %s." % (name,unique_name)
else:
- unique_name=name
+ unique_name = name
assert unique_name not in self.matrix, "ERROR. There is a discrepancy between taxlabels and matrix keys. Report this as a bug."
- self.matrix[unique_name]=Seq(sequence,self.alphabet)
- self.ntax+=1
+ self.matrix[unique_name] = Seq(sequence, self.alphabet)
+ self.ntax += 1
self.taxlabels.append(unique_name)
self.unaltered_taxlabels.append(name)
- def insert_gap(self,pos,n=1,leftgreedy=False):
+ def insert_gap(self, pos, n=1, leftgreedy=False):
"""Add a gap into the matrix and adjust charsets and partitions.
pos=0: first position
pos=nchar: last position
"""
- def _adjust(set,x,d,leftgreedy=False):
+ def _adjust(set, x, d, leftgreedy=False):
"""Adjusts character sets if gaps are inserted, taking care of
new gaps within a coherent character set."""
# if 3 gaps are inserted at pos. 9 in a set that looks like 1 2 3 8 9 10 11 13 14 15
# then the adjusted set will be 1 2 3 8 9 10 11 12 13 14 15 16 17 18
# but inserting into position 8 it will stay like 1 2 3 11 12 13 14 15 16 17 18
set.sort()
- addpos=0
- for i,c in enumerate(set):
- if c>=x:
- set[i]=c+d
+ addpos = 0
+ for i, c in enumerate(set):
+ if c >= x:
+ set[i] = c + d
# if we add gaps within a group of characters, we want the gap position included in this group
- if c==x:
+ if c == x:
if leftgreedy or (i>0 and set[i-1]==c-1):
- addpos=i
- if addpos>0:
- set[addpos:addpos]=range(x,x+d)
+ addpos = i
+ if addpos > 0:
+ set[addpos:addpos] = list(range(x, x+d))
return set
- if pos<0 or pos>self.nchar:
+ if pos < 0 or pos > self.nchar:
raise NexusError('Illegal gap position: %d' % pos)
- if n==0:
+ if n == 0:
return
- if self.taxlabels:
- #python 2.3 does not support zip(*[])
- sitesm=zip(*[str(self.matrix[t]) for t in self.taxlabels])
- else:
- sitesm=[]
- sitesm[pos:pos]=[['-']*len(self.taxlabels)]*n
- # #self.matrix=dict([(taxon,Seq(map(''.join,zip(*sitesm))[i],self.alphabet)) for\
- # i,taxon in enumerate(self.taxlabels)])
- zipped=zip(*sitesm)
- mapped=map(''.join,zipped)
- listed=[(taxon,Seq(mapped[i],self.alphabet)) for i,taxon in enumerate(self.taxlabels)]
- self.matrix=dict(listed)
- self.nchar+=n
+ sitesm = list(zip(*[str(self.matrix[t]) for t in self.taxlabels]))
+ sitesm[pos:pos] = [['-']*len(self.taxlabels)] * n
+ mapped = [''.join(x) for x in zip(*sitesm)]
+ listed = [(taxon, Seq(mapped[i], self.alphabet)) for i, taxon in enumerate(self.taxlabels)]
+ self.matrix = dict(listed)
+ self.nchar += n
# now adjust character sets
- for i,s in self.charsets.iteritems():
- self.charsets[i]=_adjust(s,pos,n,leftgreedy=leftgreedy)
+ for i, s in self.charsets.items():
+ self.charsets[i] = _adjust(s, pos, n, leftgreedy=leftgreedy)
for p in self.charpartitions:
- for sp,s in self.charpartitions[p].iteritems():
- self.charpartitions[p][sp]=_adjust(s,pos,n,leftgreedy=leftgreedy)
+ for sp, s in self.charpartitions[p].items():
+ self.charpartitions[p][sp] = _adjust(s, pos, n, leftgreedy=leftgreedy)
# now adjust character state labels
- self.charlabels=self._adjust_charlabels(insert=[pos]*n)
+ self.charlabels = self._adjust_charlabels(insert=[pos]*n)
return self.charlabels
- def _adjust_charlabels(self,exclude=None,insert=None):
+ def _adjust_charlabels(self, exclude=None, insert=None):
"""Return adjusted indices of self.charlabels if characters are excluded or inserted."""
if exclude and insert:
raise NexusError('Can\'t exclude and insert at the same time')
if not self.charlabels:
return None
- labels=sorted(self.charlabels)
- newcharlabels={}
+ labels = sorted(self.charlabels)
+ newcharlabels = {}
if exclude:
exclude.sort()
- exclude.append(sys.maxint)
- excount=0
+ exclude.append(sys.maxsize)
+ excount = 0
for c in labels:
if not c in exclude:
- while c>exclude[excount]:
- excount+=1
- newcharlabels[c-excount]=self.charlabels[c]
+ while c > exclude[excount]:
+ excount += 1
+ newcharlabels[c-excount] = self.charlabels[c]
elif insert:
insert.sort()
- insert.append(sys.maxint)
- icount=0
+ insert.append(sys.maxsize)
+ icount = 0
for c in labels:
- while c>=insert[icount]:
- icount+=1
- newcharlabels[c+icount]=self.charlabels[c]
+ while c >= insert[icount]:
+ icount += 1
+ newcharlabels[c+icount] = self.charlabels[c]
else:
return self.charlabels
return newcharlabels
- def invert(self,charlist):
+ def invert(self, charlist):
"""Returns all character indices that are not in charlist."""
return [c for c in range(self.nchar) if c not in charlist]
- def gaponly(self,include_missing=False):
+ def gaponly(self, include_missing=False):
"""Return gap-only sites."""
- gap=set(self.gap)
+ gap = set(self.gap)
if include_missing:
gap.add(self.missing)
- sitesm=zip(*[str(self.matrix[t]) for t in self.taxlabels])
- gaponly=[i for i,site in enumerate(sitesm) if set(site).issubset(gap)]
- return gaponly
+ sitesm = zip(*[str(self.matrix[t]) for t in self.taxlabels])
+ return [i for i, site in enumerate(sitesm) if set(site).issubset(gap)]
- def terminal_gap_to_missing(self,missing=None,skip_n=True):
+ def terminal_gap_to_missing(self, missing=None, skip_n=True):
"""Replaces all terminal gaps with missing character.
Mixtures like ???------??------- are properly resolved."""
if not missing:
- missing=self.missing
- replace=[self.missing,self.gap]
+ missing = self.missing
+ replace = [self.missing, self.gap]
if not skip_n:
- replace.extend(['n','N'])
+ replace.extend(['n', 'N'])
for taxon in self.taxlabels:
- sequence=str(self.matrix[taxon])
- length=len(sequence)
- start,end=get_start_end(sequence,skiplist=replace)
- if start==-1 and end==-1:
- sequence=missing*length
+ sequence = str(self.matrix[taxon])
+ length = len(sequence)
+ start, end = get_start_end(sequence, skiplist=replace)
+ if start == -1 and end == -1:
+ sequence = missing*length
else:
- sequence=sequence[:end+1]+missing*(length-end-1)
- sequence=start*missing+sequence[start:]
+ sequence = sequence[:end+1] + missing*(length-end-1)
+ sequence = start*missing + sequence[start:]
assert length==len(sequence), 'Illegal sequence manipulation in Nexus.terminal_gap_to_missing in taxon %s' % taxon
- self.matrix[taxon]=Seq(sequence,self.alphabet)
+ self.matrix[taxon] = Seq(sequence, self.alphabet)
try:
import cnexus
except ImportError:
def _get_command_lines(file_contents):
- lines=_kill_comments_and_break_lines(file_contents)
- commandlines=_adjust_lines(lines)
+ lines = _kill_comments_and_break_lines(file_contents)
+ commandlines = _adjust_lines(lines)
return commandlines
else:
def _get_command_lines(file_contents):
- decommented=cnexus.scanfile(file_contents)
+ decommented = cnexus.scanfile(file_contents)
#check for unmatched parentheses
- if decommented=='[' or decommented==']':
+ if decommented == '[' or decommented == ']':
raise NexusError('Unmatched %s' % decommented)
# cnexus can't return lists, so in analogy we separate
# commandlines with chr(7) (a character that shouldn't be part of a
# nexus file under normal circumstances)
- commandlines=_adjust_lines(decommented.split(chr(7)))
+ commandlines = _adjust_lines(decommented.split(chr(7)))
return commandlines
diff -Nru python-biopython-1.62/Bio/Nexus/Nodes.py python-biopython-1.63/Bio/Nexus/Nodes.py
--- python-biopython-1.62/Bio/Nexus/Nodes.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Nexus/Nodes.py 2013-12-05 14:10:43.000000000 +0000
@@ -38,7 +38,7 @@
def all_ids(self):
"""Return a list of all node ids."""
- return self.chain.keys()
+ return list(self.chain.keys())
def add(self,node,prev=None):
"""Attaches node to another."""
@@ -53,7 +53,7 @@
self.chain[id]=node
return id
- def collapse(self,id):
+ def collapse(self, id):
"""Deletes node from chain and relinks successors to predecessor."""
if id not in self.chain:
raise ChainException('Unknown ID: '+str(id))
@@ -67,14 +67,14 @@
self.kill(id)
return node
- def kill(self,id):
+ def kill(self, id):
"""Kills a node from chain without caring to what it is connected."""
if id not in self.chain:
raise ChainException('Unknown ID: '+str(id))
else:
del self.chain[id]
- def unlink(self,id):
+ def unlink(self, id):
"""Disconnects node from his predecessor."""
if id not in self.chain:
raise ChainException('Unknown ID: '+str(id))
@@ -85,7 +85,7 @@
self.chain[id].prev=None
return prev_id
- def link(self, parent,child):
+ def link(self, parent, child):
"""Connects son to parent."""
if child not in self.chain:
raise ChainException('Unknown ID: '+str(child))
@@ -96,26 +96,26 @@
self.chain[parent].succ.append(child)
self.chain[child].set_prev(parent)
- def is_parent_of(self,parent,grandchild):
+ def is_parent_of(self, parent, grandchild):
"""Check if grandchild is a subnode of parent."""
if grandchild==parent or grandchild in self.chain[parent].get_succ():
return True
else:
for sn in self.chain[parent].get_succ():
- if self.is_parent_of(sn,grandchild):
+ if self.is_parent_of(sn, grandchild):
return True
else:
return False
- def trace(self,start,finish):
+ def trace(self, start, finish):
"""Returns a list of all node_ids between two nodes (excluding start, including end)."""
if start not in self.chain or finish not in self.chain:
raise NodeException('Unknown node.')
- if not self.is_parent_of(start,finish) or start==finish:
+ if not self.is_parent_of(start, finish) or start==finish:
return []
for sn in self.chain[start].get_succ():
- if self.is_parent_of(sn,finish):
- return [sn]+self.trace(sn,finish)
+ if self.is_parent_of(sn, finish):
+ return [sn]+self.trace(sn, finish)
class Node(object):
@@ -128,7 +128,7 @@
self.prev=None
self.succ=[]
- def set_id(self,id):
+ def set_id(self, id):
"""Sets the id of a node, if not set yet."""
if self.id is not None:
raise NodeException('Node id cannot be changed.')
@@ -146,24 +146,24 @@
"""Returns the id of the node's predecessor."""
return self.prev
- def add_succ(self,id):
+ def add_succ(self, id):
"""Adds a node id to the node's successors."""
- if isinstance(id,type([])):
+ if isinstance(id, type([])):
self.succ.extend(id)
else:
self.succ.append(id)
- def remove_succ(self,id):
+ def remove_succ(self, id):
"""Removes a node id from the node's successors."""
self.succ.remove(id)
- def set_succ(self,new_succ):
+ def set_succ(self, new_succ):
"""Sets the node's successors."""
- if not isinstance(new_succ,type([])):
+ if not isinstance(new_succ, type([])):
raise NodeException('Node successor must be of list type.')
self.succ=new_succ
- def set_prev(self,id):
+ def set_prev(self, id):
"""Sets the node's predecessor."""
self.prev=id
@@ -171,6 +171,6 @@
"""Returns a node's data."""
return self.data
- def set_data(self,data):
+ def set_data(self, data):
"""Sets a node's data."""
self.data=data
diff -Nru python-biopython-1.62/Bio/Nexus/Trees.py python-biopython-1.63/Bio/Nexus/Trees.py
--- python-biopython-1.62/Bio/Nexus/Trees.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Nexus/Trees.py 2013-12-05 14:10:43.000000000 +0000
@@ -12,9 +12,11 @@
nodes).
"""
+from __future__ import print_function
+
import random
import sys
-import Nodes
+from . import Nodes
PRECISION_BRANCHLENGTH=6
PRECISION_SUPPORT=6
@@ -61,14 +63,14 @@
self.root = self.add(root)
if tree: # use the tree we have
# if Tree is called from outside Nexus parser, we need to get rid of linebreaks, etc
- tree=tree.strip().replace('\n','').replace('\r','')
+ tree=tree.strip().replace('\n', '').replace('\r', '')
# there's discrepancy whether newick allows semicolons et the end
tree=tree.rstrip(';')
subtree_info, base_info = self._parse(tree)
root.data = self._add_nodedata(root.data, [[], base_info])
- self._add_subtree(parent_id=root.id,tree=subtree_info)
+ self._add_subtree(parent_id=root.id, tree=subtree_info)
- def _parse(self,tree):
+ def _parse(self, tree):
"""Parses (a,b,c...)[[[xx]:]yy] into subcomponents and travels down recursively."""
#Remove any leading/trailing white space - want any string starting
#with " (..." should be recognised as a leaf, "(..."
@@ -80,15 +82,15 @@
nodecomment=tree.find(NODECOMMENT_START)
colon=tree.find(':')
if colon==-1 and nodecomment==-1: # none
- return [tree,[None]]
+ return [tree, [None]]
elif colon==-1 and nodecomment>-1: # only special comment
- return [tree[:nodecomment],self._get_values(tree[nodecomment:])]
+ return [tree[:nodecomment], self._get_values(tree[nodecomment:])]
elif colon>-1 and nodecomment==-1: # only numerical values
- return [tree[:colon],self._get_values(tree[colon+1:])]
+ return [tree[:colon], self._get_values(tree[colon+1:])]
elif colon < nodecomment: # taxon name ends at first colon or with special comment
- return [tree[:colon],self._get_values(tree[colon+1:])]
+ return [tree[:colon], self._get_values(tree[colon+1:])]
else:
- return [tree[:nodecomment],self._get_values(tree[nodecomment:])]
+ return [tree[:nodecomment], self._get_values(tree[nodecomment:])]
else:
closing=tree.rfind(')')
val=self._get_values(tree[closing+1:])
@@ -97,7 +99,7 @@
subtrees=[]
plevel=0
prev=1
- for p in range(1,closing):
+ for p in range(1, closing):
if tree[p]=='(':
plevel+=1
elif tree[p]==')':
@@ -107,7 +109,7 @@
prev=p+1
subtrees.append(tree[prev:closing])
subclades=[self._parse(subtree) for subtree in subtrees]
- return [subclades,val]
+ return [subclades, val]
def _add_subtree(self,parent_id=None,tree=None):
"""Adds leaf or tree (in newick format) to a parent_id."""
@@ -116,19 +118,19 @@
for st in tree:
nd=self.dataclass()
nd = self._add_nodedata(nd, st)
- if type(st[0])==list: # it's a subtree
+ if isinstance(st[0], list): # it's a subtree
sn=Nodes.Node(nd)
- self.add(sn,parent_id)
- self._add_subtree(sn.id,st[0])
+ self.add(sn, parent_id)
+ self._add_subtree(sn.id, st[0])
else: # it's a leaf
nd.taxon=st[0]
leaf=Nodes.Node(nd)
- self.add(leaf,parent_id)
+ self.add(leaf, parent_id)
def _add_nodedata(self, nd, st):
"""Add data to the node parsed from the comments, taxon and support.
"""
- if isinstance(st[1][-1],str) and st[1][-1].startswith(NODECOMMENT_START):
+ if isinstance(st[1][-1], str) and st[1][-1].startswith(NODECOMMENT_START):
nd.comment=st[1].pop(-1)
# if the first element is a string, it's the subtree node taxon
elif isinstance(st[1][0], str):
@@ -188,7 +190,7 @@
for sn in self._walk(n):
yield sn
- def node(self,node_id):
+ def node(self, node_id):
"""Return the instance of node_id.
node = node(self,node_id)
@@ -214,20 +216,20 @@
if parent_data.taxon:
node.data.taxon=parent_data.taxon+str(i)
node.data.branchlength=branchlength
- ids.append(self.add(node,parent_id))
+ ids.append(self.add(node, parent_id))
return ids
- def search_taxon(self,taxon):
+ def search_taxon(self, taxon):
"""Returns the first matching taxon in self.data.taxon. Not restricted to terminal nodes.
node_id = search_taxon(self,taxon)
"""
- for id,node in self.chain.iteritems():
+ for id, node in self.chain.items():
if node.data.taxon==taxon:
return id
return None
- def prune(self,taxon):
+ def prune(self, taxon):
"""Prunes a terminal taxon from the tree.
id_of_previous_node = prune(self,taxon)
@@ -281,15 +283,15 @@
"""Return a list of all terminal nodes."""
return [i for i in self.all_ids() if self.node(i).succ==[]]
- def is_terminal(self,node):
+ def is_terminal(self, node):
"""Returns True if node is a terminal node."""
return self.node(node).succ==[]
- def is_internal(self,node):
+ def is_internal(self, node):
"""Returns True if node is an internal node."""
return len(self.node(node).succ)>0
- def is_preterminal(self,node):
+ def is_preterminal(self, node):
"""Returns True if all successors of a node are terminal ones."""
if self.is_terminal(node):
return False not in [self.is_terminal(n) for n in self.node(node).succ]
@@ -313,9 +315,9 @@
genera=[]
for t in taxa:
if space_equals_underscore:
- t=t.replace(' ','_')
+ t=t.replace(' ', '_')
try:
- genus=t.split('_',1)[0]
+ genus=t.split('_', 1)[0]
except:
genus='None'
if genus not in genera:
@@ -347,7 +349,7 @@
node=self.node(node).prev
return blen
- def set_subtree(self,node):
+ def set_subtree(self, node):
"""Return subtree as a set of nested sets.
sets = set_subtree(self,node)
@@ -357,16 +359,16 @@
return self.node(node).data.taxon
else:
try:
- return frozenset([self.set_subtree(n) for n in self.node(node).succ])
+ return frozenset(self.set_subtree(n) for n in self.node(node).succ)
except:
- print node
- print self.node(node).succ
+ print(node)
+ print(self.node(node).succ)
for n in self.node(node).succ:
- print n, self.set_subtree(n)
- print [self.set_subtree(n) for n in self.node(node).succ]
+ print("%s %s" % (n, self.set_subtree(n)))
+ print([self.set_subtree(n) for n in self.node(node).succ])
raise
- def is_identical(self,tree2):
+ def is_identical(self, tree2):
"""Compare tree and tree2 for identity.
result = is_identical(self,tree2)
@@ -384,55 +386,55 @@
missing1=set(tree2.get_taxa())-set(self.get_taxa())
if strict and (missing1 or missing2):
if missing1:
- print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing1) , self.name)
+ print('Taxon/taxa %s is/are missing in tree %s' % (','.join(missing1), self.name))
if missing2:
- print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing2) , tree2.name)
+ print('Taxon/taxa %s is/are missing in tree %s' % (','.join(missing2), tree2.name))
raise TreeError('Can\'t compare trees with different taxon compositions.')
- t1=[(set(self.get_taxa(n)),self.node(n).data.support) for n in self.all_ids() if
+ t1=[(set(self.get_taxa(n)), self.node(n).data.support) for n in self.all_ids() if
self.node(n).succ and
(self.node(n).data and self.node(n).data.support and self.node(n).data.support>=threshold)]
- t2=[(set(tree2.get_taxa(n)),tree2.node(n).data.support) for n in tree2.all_ids() if
+ t2=[(set(tree2.get_taxa(n)), tree2.node(n).data.support) for n in tree2.all_ids() if
tree2.node(n).succ and
(tree2.node(n).data and tree2.node(n).data.support and tree2.node(n).data.support>=threshold)]
conflict=[]
- for (st1,sup1) in t1:
- for (st2,sup2) in t2:
+ for (st1, sup1) in t1:
+ for (st2, sup2) in t2:
if not st1.issubset(st2) and not st2.issubset(st1): # don't hiccup on upstream nodes
- intersect,notin1,notin2=st1 & st2, st2-st1, st1-st2 # all three are non-empty sets
+ intersect, notin1, notin2=st1 & st2, st2-st1, st1-st2 # all three are non-empty sets
# if notin1==missing1 or notin2==missing2 <==> st1.issubset(st2) or st2.issubset(st1) ???
if intersect and not (notin1.issubset(missing1) or notin2.issubset(missing2)): # omit conflicts due to missing taxa
- conflict.append((st1,sup1,st2,sup2,intersect,notin1,notin2))
+ conflict.append((st1, sup1, st2, sup2, intersect, notin1, notin2))
return conflict
- def common_ancestor(self,node1,node2):
+ def common_ancestor(self, node1, node2):
"""Return the common ancestor that connects two nodes.
node_id = common_ancestor(self,node1,node2)
"""
- l1=[self.root]+self.trace(self.root,node1)
- l2=[self.root]+self.trace(self.root,node2)
+ l1=[self.root]+self.trace(self.root, node1)
+ l2=[self.root]+self.trace(self.root, node2)
return [n for n in l1 if n in l2][-1]
- def distance(self,node1,node2):
+ def distance(self, node1, node2):
"""Add and return the sum of the branchlengths between two nodes.
dist = distance(self,node1,node2)
"""
- ca=self.common_ancestor(node1,node2)
- return self.sum_branchlength(ca,node1)+self.sum_branchlength(ca,node2)
+ ca=self.common_ancestor(node1, node2)
+ return self.sum_branchlength(ca, node1)+self.sum_branchlength(ca, node2)
- def is_monophyletic(self,taxon_list):
+ def is_monophyletic(self, taxon_list):
"""Return node_id of common ancestor if taxon_list is monophyletic, -1 otherwise.
result = is_monophyletic(self,taxon_list)
"""
- if isinstance(taxon_list,str):
+ if isinstance(taxon_list, str):
taxon_set=set([taxon_list])
else:
taxon_set=set(taxon_list)
node_id=self.root
- while 1:
+ while True:
subclade_taxa=set(self.get_taxa(node_id))
if subclade_taxa==taxon_set: # are we there?
return node_id
@@ -470,7 +472,7 @@
self.node(n).data.support=self.node(n).data.branchlength
self.node(n).data.branchlength=0.0
- def convert_absolute_support(self,nrep):
+ def convert_absolute_support(self, nrep):
"""Convert absolute support (clade-count) to rel. frequencies.
Some software (e.g. PHYLIP consense) just calculate how often clades appear, instead of
@@ -509,11 +511,11 @@
# bifurcate randomly at terminal nodes until ntax is reached
while len(terminals)1:
@@ -760,16 +761,16 @@
# no outgroup specified: use the smallest clade of the root
if outgroup is None:
try:
- succnodes=self.node(self.root).succ
- smallest=min([(len(self.get_taxa(n)),n) for n in succnodes])
- outgroup=self.get_taxa(smallest[1])
+ succnodes = self.node(self.root).succ
+ smallest = min((len(self.get_taxa(n)), n) for n in succnodes)
+ outgroup = self.get_taxa(smallest[1])
except:
raise TreeError("Error determining outgroup.")
else: # root with user specified outgroup
self.root_with_outgroup(outgroup)
if bstrees: # calculate consensus
- constree=consensus(bstrees,threshold=threshold,outgroup=outgroup)
+ constree=consensus(bstrees, threshold=threshold, outgroup=outgroup)
else:
if not constree.has_support():
constree.branchlength2support()
@@ -798,13 +799,12 @@
for t in trees:
c+=1
#if c%100==0:
- # print c
+ # print(c)
if alltaxa!=set(t.get_taxa()):
raise TreeError('Trees for consensus must contain the same taxa')
t.root_with_outgroup(outgroup=outgroup)
for st_node in t._walk(t.root):
- subclade_taxa=t.get_taxa(st_node)
- subclade_taxa.sort()
+ subclade_taxa=sorted(t.get_taxa(st_node))
subclade_taxa=str(subclade_taxa) # lists are not hashable
if subclade_taxa in clades:
clades[subclade_taxa]+=float(t.weight)/total
@@ -815,13 +815,13 @@
#else:
# countclades[subclade_taxa]=t.weight
# weed out clades below threshold
- delclades=[c for c,p in clades.iteritems() if round(p,3)>> for (res, property) in iter(map):
- ... print res, property
+ ... print(res, property)
@return: iterator
"""
@@ -133,3 +135,4 @@
if isinstance(res_id, int):
ent_id=(chain_id, (' ', res_id, ' '), atom_name, icode)
return ent_id
+
diff -Nru python-biopython-1.62/Bio/PDB/Atom.py python-biopython-1.63/Bio/PDB/Atom.py
--- python-biopython-1.62/Bio/PDB/Atom.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/Atom.py 2013-12-05 14:10:43.000000000 +0000
@@ -124,7 +124,7 @@
@type other: L{Atom}
"""
diff=self.coord-other.coord
- return numpy.sqrt(numpy.dot(diff,diff))
+ return numpy.sqrt(numpy.dot(diff, diff))
# set methods
@@ -262,8 +262,8 @@
Apply rotation and translation to the atomic coordinates.
Example:
- >>> rotation=rotmat(pi, Vector(1,0,0))
- >>> translation=array((0,0,1), 'f')
+ >>> rotation=rotmat(pi, Vector(1, 0, 0))
+ >>> translation=array((0, 0, 1), 'f')
>>> atom.transform(rotation, translation)
@param rot: A right multiplying rotation matrix
@@ -281,8 +281,8 @@
@return: coordinates as 3D vector
@rtype: Vector
"""
- x,y,z=self.coord
- return Vector(x,y,z)
+ x, y, z=self.coord
+ return Vector(x, y, z)
def copy(self):
"""
@@ -333,3 +333,4 @@
if occupancy>self.last_occupancy:
self.last_occupancy=occupancy
self.disordered_select(altloc)
+
diff -Nru python-biopython-1.62/Bio/PDB/DSSP.py python-biopython-1.63/Bio/PDB/DSSP.py
--- python-biopython-1.62/Bio/PDB/DSSP.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/DSSP.py 2013-12-05 14:10:43.000000000 +0000
@@ -6,7 +6,7 @@
"""Use the DSSP program to calculate secondary structure and accessibility.
You need to have a working version of DSSP (and a license, free for academic
-use) in order to use this. For DSSP, see U{http://www.cmbi.kun.nl/gv/dssp/}.
+use) in order to use this. For DSSP, see U{http://swift.cmbi.ru.nl/gv/dssp/}.
The DSSP codes for secondary structure used here are:
@@ -20,10 +20,12 @@
- - None
"""
-from __future__ import with_statement
+from __future__ import print_function
+
+__docformat__ = "epytext en"
import re
-from StringIO import StringIO
+from Bio._py3k import StringIO
import subprocess
from Bio.Data import SCOPData
@@ -114,7 +116,6 @@
@param filename: the DSSP output file
@type filename: string
"""
- handle = open(filename, "r")
with open(filename, "r") as handle:
return _make_dssp_dict(handle)
@@ -123,15 +124,15 @@
Return a DSSP dictionary that maps (chainid, resid) to
aa, ss and accessibility, from an open DSSP file object.
- @param filename: the open DSSP output file
- @type filename: file
+ @param handle: the open DSSP output file handle
+ @type handle: file
"""
dssp = {}
start = 0
keys = []
for l in handle.readlines():
sl = l.split()
- if not sl:
+ if len(sl) < 2:
continue
if sl[1] == "RESIDUE":
# Start parsing from here
@@ -153,7 +154,7 @@
acc = int(l[34:38])
phi = float(l[103:109])
psi = float(l[109:115])
- except ValueError, exc:
+ except ValueError as exc:
# DSSP output breaks its own format when there are >9999
# residues, since only 4 digits are allocated to the seq num
# field. See 3kic chain T res 321, 1vsy chain T res 6077.
@@ -186,7 +187,7 @@
>>> model = structure[0]
>>> dssp = DSSP(model, "1MOT.pdb")
>>> # DSSP data is accessed by a tuple (chain_id, res_id)
- >>> a_key = dssp.keys()[2]
+ >>> a_key = list(dssp.keys())[2]
>>> # residue object, secondary structure, solvent accessibility,
>>> # relative accessiblity, phi, psi
>>> dssp[a_key]
@@ -328,11 +329,11 @@
d = DSSP(model, sys.argv[1])
for r in d:
- print r
- print "Handled", len(d), "residues"
- print d.keys()
+ print(r)
+ print("Handled %i residues" % len(d))
+ print(d.keys())
if ('A', 1) in d:
- print d[('A', 1)]
- print s[0]['A'][1].xtra
+ print(d[('A', 1)])
+ print(s[0]['A'][1].xtra)
# Secondary structure
- print ''.join(d[key][1] for key in d.keys())
+ print(''.join(item[1] for item in d))
diff -Nru python-biopython-1.62/Bio/PDB/Entity.py python-biopython-1.63/Bio/PDB/Entity.py
--- python-biopython-1.62/Bio/PDB/Entity.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/Entity.py 2013-12-05 14:10:43.000000000 +0000
@@ -156,8 +156,8 @@
Apply rotation and translation to the atomic coordinates.
Example:
- >>> rotation=rotmat(pi, Vector(1,0,0))
- >>> translation=array((0,0,1), 'f')
+ >>> rotation=rotmat(pi, Vector(1, 0, 0))
+ >>> translation=array((0, 0, 1), 'f')
>>> entity.transform(rotation, translation)
@param rot: A right multiplying rotation matrix
@@ -280,10 +280,8 @@
def disordered_get_id_list(self):
"Return a list of id's."
- l=self.child_dict.keys()
# sort id list alphabetically
- l.sort()
- return l
+ return sorted(self.child_dict)
def disordered_get(self, id=None):
"""Get the child object associated with id.
@@ -296,4 +294,5 @@
def disordered_get_list(self):
"Return list of children."
- return self.child_dict.values()
+ return list(self.child_dict.values())
+
diff -Nru python-biopython-1.62/Bio/PDB/FragmentMapper.py python-biopython-1.63/Bio/PDB/FragmentMapper.py
--- python-biopython-1.62/Bio/PDB/FragmentMapper.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/FragmentMapper.py 2013-12-05 14:10:43.000000000 +0000
@@ -30,6 +30,8 @@
>>> fragment = fm[residue]
"""
+from __future__ import print_function
+
import numpy
from Bio.SVDSuperimposer import SVDSuperimposer
@@ -62,27 +64,26 @@
@type dir: string
"""
filename=(dir+"/"+_FRAGMENT_FILE) % (size, length)
- fp=open(filename, "r")
- flist=[]
- # ID of fragment=rank in spec file
- fid=0
- for l in fp.readlines():
- # skip comment and blank lines
- if l[0]=="*" or l[0]=="\n":
- continue
- sl=l.split()
- if sl[1]=="------":
- # Start of fragment definition
- f=Fragment(length, fid)
- flist.append(f)
- # increase fragment id (rank)
- fid+=1
- continue
- # Add CA coord to Fragment
- coord=numpy.array(map(float, sl[0:3]))
- # XXX= dummy residue name
- f.add_residue("XXX", coord)
- fp.close()
+ with open(filename, "r") as fp:
+ flist=[]
+ # ID of fragment=rank in spec file
+ fid=0
+ for l in fp.readlines():
+ # skip comment and blank lines
+ if l[0]=="*" or l[0]=="\n":
+ continue
+ sl=l.split()
+ if sl[1]=="------":
+ # Start of fragment definition
+ f=Fragment(length, fid)
+ flist.append(f)
+ # increase fragment id (rank)
+ fid+=1
+ continue
+ # Add CA coord to Fragment
+ coord = numpy.array([float(x) for x in sl[0:3]])
+ # XXX= dummy residue name
+ f.add_residue("XXX", coord)
return flist
@@ -284,7 +285,7 @@
index=i-self.edge
assert(index>=0)
fd[res]=mflist[index]
- except PDBException, why:
+ except PDBException as why:
if why == 'CHAINBREAK':
# Funny polypeptide - skip
pass
@@ -323,16 +324,12 @@
import sys
- p=PDBParser()
- s=p.get_structure("X", sys.argv[1])
-
- m=s[0]
- fm=FragmentMapper(m, 10, 5, "levitt_data")
+ p = PDBParser()
+ s = p.get_structure("X", sys.argv[1])
+ m = s[0]
+ fm = FragmentMapper(m, 10, 5, "levitt_data")
for r in Selection.unfold_entities(m, "R"):
-
- print r,
+ print("%s:" % r)
if r in fm:
- print fm[r]
- else:
- print
+ print(fm[r])
diff -Nru python-biopython-1.62/Bio/PDB/HSExposure.py python-biopython-1.63/Bio/PDB/HSExposure.py
--- python-biopython-1.62/Bio/PDB/HSExposure.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/HSExposure.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
"""Half-sphere exposure and coordination number calculation."""
+from __future__ import print_function
+
import warnings
from math import pi
@@ -207,20 +209,19 @@
if len(self.ca_cb_list)==0:
warnings.warn("Nothing to draw.", RuntimeWarning)
return
- fp=open(filename, "w")
- fp.write("from pymol.cgo import *\n")
- fp.write("from pymol import cmd\n")
- fp.write("obj=[\n")
- fp.write("BEGIN, LINES,\n")
- fp.write("COLOR, %.2f, %.2f, %.2f,\n" % (1.0, 1.0, 1.0))
- for (ca, cb) in self.ca_cb_list:
- x,y,z=ca.get_array()
- fp.write("VERTEX, %.2f, %.2f, %.2f,\n" % (x,y,z))
- x,y,z=cb.get_array()
- fp.write("VERTEX, %.2f, %.2f, %.2f,\n" % (x,y,z))
- fp.write("END]\n")
- fp.write("cmd.load_cgo(obj, 'HS')\n")
- fp.close()
+ with open(filename, "w") as fp:
+ fp.write("from pymol.cgo import *\n")
+ fp.write("from pymol import cmd\n")
+ fp.write("obj=[\n")
+ fp.write("BEGIN, LINES,\n")
+ fp.write("COLOR, %.2f, %.2f, %.2f,\n" % (1.0, 1.0, 1.0))
+ for (ca, cb) in self.ca_cb_list:
+ x, y, z=ca.get_array()
+ fp.write("VERTEX, %.2f, %.2f, %.2f,\n" % (x, y, z))
+ x, y, z=cb.get_array()
+ fp.write("VERTEX, %.2f, %.2f, %.2f,\n" % (x, y, z))
+ fp.write("END]\n")
+ fp.write("cmd.load_cgo(obj, 'HS')\n")
class HSExposureCB(_AbstractHSExposure):
@@ -324,22 +325,22 @@
hse=HSExposureCA(model, radius=RADIUS, offset=OFFSET)
for l in hse:
- print l
- print
+ print(l)
+ print("")
hse=HSExposureCB(model, radius=RADIUS, offset=OFFSET)
for l in hse:
- print l
- print
+ print(l)
+ print("")
hse=ExposureCN(model, radius=RADIUS, offset=OFFSET)
for l in hse:
- print l
- print
+ print(l)
+ print("")
for c in model:
for r in c:
try:
- print r.xtra['PCB_CB_ANGLE']
+ print(r.xtra['PCB_CB_ANGLE'])
except:
pass
diff -Nru python-biopython-1.62/Bio/PDB/MMCIF2Dict.py python-biopython-1.63/Bio/PDB/MMCIF2Dict.py
--- python-biopython-1.62/Bio/PDB/MMCIF2Dict.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/MMCIF2Dict.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,43 +5,46 @@
"""Turn an mmCIF file into a dictionary."""
+from __future__ import print_function
+
+from Bio._py3k import input as _input
+
import shlex
class MMCIF2Dict(dict):
def __init__(self, filename):
- handle = open(filename)
- loop_flag = False
- key = None
- tokens = self._tokenize(handle)
- token = tokens.next()
- self[token[0:5]]=token[5:]
- for token in tokens:
- if token=="loop_":
- loop_flag = True
- keys = []
- i = 0
- n = 0
- continue
- elif loop_flag:
- if token.startswith("_"):
- if i > 0:
- loop_flag = False
+ with open(filename) as handle:
+ loop_flag = False
+ key = None
+ tokens = self._tokenize(handle)
+ token = next(tokens)
+ self[token[0:5]]=token[5:]
+ for token in tokens:
+ if token=="loop_":
+ loop_flag = True
+ keys = []
+ i = 0
+ n = 0
+ continue
+ elif loop_flag:
+ if token.startswith("_"):
+ if i > 0:
+ loop_flag = False
+ else:
+ self[token] = []
+ keys.append(token)
+ n += 1
+ continue
else:
- self[token] = []
- keys.append(token)
- n += 1
+ self[keys[i%n]].append(token)
+ i+=1
continue
+ if key is None:
+ key = token
else:
- self[keys[i%n]].append(token)
- i+=1
- continue
- if key is None:
- key = token
- else:
- self[key] = token
- key = None
- handle.close()
+ self[key] = token
+ key = None
def _tokenize(self, handle):
for line in handle:
@@ -66,28 +69,28 @@
import sys
if len(sys.argv)!=2:
- print "Usage: python MMCIF2Dict filename."
+ print("Usage: python MMCIF2Dict filename.")
filename=sys.argv[1]
mmcif_dict = MMCIF2Dict(filename)
entry = ""
- print "Now type a key ('q' to end, 'k' for a list of all keys):"
+ print("Now type a key ('q' to end, 'k' for a list of all keys):")
while(entry != "q"):
- entry = raw_input("MMCIF dictionary key ==> ")
+ entry = _input("MMCIF dictionary key ==> ")
if entry == "q":
sys.exit()
if entry == "k":
for key in mmcif_dict:
- print key
+ print(key)
continue
try:
value=mmcif_dict[entry]
if isinstance(value, list):
for item in value:
- print item
+ print(item)
else:
- print value
+ print(value)
except KeyError:
- print "No such key found."
+ print("No such key found.")
diff -Nru python-biopython-1.62/Bio/PDB/MMCIFParser.py python-biopython-1.63/Bio/PDB/MMCIFParser.py
--- python-biopython-1.62/Bio/PDB/MMCIFParser.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/MMCIFParser.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,10 +5,14 @@
"""mmCIF parser"""
+from __future__ import print_function
+
from string import ascii_letters
import numpy
+from Bio._py3k import range
+
from Bio.PDB.MMCIF2Dict import MMCIF2Dict
from Bio.PDB.StructureBuilder import StructureBuilder
from Bio.PDB.PDBExceptions import PDBConstructionException
@@ -31,9 +35,9 @@
element_list = None
seq_id_list=mmcif_dict["_atom_site.label_seq_id"]
chain_id_list=mmcif_dict["_atom_site.label_asym_id"]
- x_list=map(float, mmcif_dict["_atom_site.Cartn_x"])
- y_list=map(float, mmcif_dict["_atom_site.Cartn_y"])
- z_list=map(float, mmcif_dict["_atom_site.Cartn_z"])
+ x_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_x"]]
+ y_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_y"]]
+ z_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_z"]]
alt_list=mmcif_dict["_atom_site.label_alt_id"]
b_factor_list=mmcif_dict["_atom_site.B_iso_or_equiv"]
occupancy_list=mmcif_dict["_atom_site.occupancy"]
@@ -73,7 +77,7 @@
# so serial_id means the Model ID specified in the file
current_model_id = 0
current_serial_id = 0
- for i in xrange(0, len(atom_id_list)):
+ for i in range(0, len(atom_id_list)):
x=x_list[i]
y=y_list[i]
z=z_list[i]
@@ -128,7 +132,7 @@
if aniso_flag==1:
u=(aniso_u11[i], aniso_u12[i], aniso_u13[i],
aniso_u22[i], aniso_u23[i], aniso_u33[i])
- mapped_anisou=map(float, u)
+ mapped_anisou = [float(x) for x in u]
anisou_array=numpy.array(mapped_anisou, 'f')
structure_builder.set_anisou(anisou_array)
# Now try to set the cell
@@ -165,7 +169,7 @@
import sys
if len(sys.argv) != 2:
- print "Usage: python MMCIFparser.py filename"
+ print("Usage: python MMCIFparser.py filename")
raise SystemExit
filename=sys.argv[1]
@@ -174,7 +178,7 @@
structure=p.get_structure("test", filename)
for model in structure.get_list():
- print model
+ print(model)
for chain in model.get_list():
- print chain
- print "Found %d residues." % len(chain.get_list())
+ print(chain)
+ print("Found %d residues." % len(chain.get_list()))
diff -Nru python-biopython-1.62/Bio/PDB/NACCESS.py python-biopython-1.63/Bio/PDB/NACCESS.py
--- python-biopython-1.62/Bio/PDB/NACCESS.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/NACCESS.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,8 +5,12 @@
# NACCESS interface adapted from Bio/PDB/DSSP.py
+from __future__ import print_function
+
import os
import tempfile
+import shutil
+import subprocess
from Bio.PDB.PDBIO import PDBIO
from Bio.PDB.AbstractPropertyMap import AbstractResiduePropertyMap, AbstractAtomPropertyMap
@@ -22,52 +26,51 @@
"""
-def run_naccess(model, pdb_file, probe_size = None, z_slice = None,
- naccess = 'naccess', temp_path = '/tmp/'):
+def run_naccess(model, pdb_file, probe_size=None, z_slice=None,
+ naccess='naccess', temp_path='/tmp/'):
- # make temp directory; chdir to temp directory,
- # as NACCESS writes to current working directory
- tmp_path = tempfile.mktemp(dir = temp_path)
- os.mkdir(tmp_path)
- old_dir = os.getcwd()
- os.chdir(tmp_path)
+ # make temp directory;
+ tmp_path = tempfile.mkdtemp(dir=temp_path)
# file name must end with '.pdb' to work with NACCESS
# -> create temp file of existing pdb
# or write model to temp file
- tmp_pdb_file = tempfile.mktemp('.pdb', dir = tmp_path)
+ handle, tmp_pdb_file = tempfile.mkstemp('.pdb', dir=tmp_path)
+ os.close(handle)
if pdb_file:
- os.system('cp %s %s' % (pdb_file, tmp_pdb_file))
+ pdb_file = os.path.abspath(pdb_file)
+ shutil.copy(pdb_file, tmp_pdb_file)
else:
writer = PDBIO()
writer.set_structure(model.get_parent())
writer.save(tmp_pdb_file)
+ # chdir to temp directory, as NACCESS writes to current working directory
+ old_dir = os.getcwd()
+ os.chdir(tmp_path)
+
# create the command line and run
# catch standard out & err
- command = '%s %s ' % (naccess, tmp_pdb_file)
+ command = [naccess, tmp_pdb_file]
if probe_size:
- command += '-p %s ' % probe_size
+ command.extend(['-p', probe_size])
if z_slice:
- command += '-z %s ' % z_slice
- in_, out, err = os.popen3(command)
- in_.close()
- stdout = out.readlines()
- out.close()
- stderr = err.readlines()
- err.close()
+ command.extend(['-z', z_slice])
+
+ p = subprocess.Popen(command, universal_newlines=True,
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = p.communicate()
+ os.chdir(old_dir)
# get the output, then delete the temp directory
rsa_file = tmp_pdb_file[:-4] + '.rsa'
- rf = open(rsa_file)
- rsa_data = rf.readlines()
- rf.close()
+ with open(rsa_file) as rf:
+ rsa_data = rf.readlines()
asa_file = tmp_pdb_file[:-4] + '.asa'
- af = open(asa_file)
- asa_data = af.readlines()
- af.close()
- os.chdir(old_dir)
- os.system('rm -rf %s >& /dev/null' % tmp_path)
+ with open(asa_file) as af:
+ asa_data = af.readlines()
+
+ shutil.rmtree(tmp_path, ignore_errors=True)
return rsa_data, asa_data
@@ -92,7 +95,7 @@
'non_polar_abs': float(line[55:61]),
'non_polar_rel': float(line[62:67]),
'all_polar_abs': float(line[68:74]),
- 'all_polar_rel': float(line[75:80]) }
+ 'all_polar_rel': float(line[75:80])}
return naccess_rel_dict
@@ -118,20 +121,21 @@
class NACCESS(AbstractResiduePropertyMap):
- def __init__(self, model, pdb_file = None,
- naccess_binary = 'naccess', tmp_directory = '/tmp'):
- res_data, atm_data = run_naccess(model, pdb_file, naccess = naccess_binary,
- temp_path = tmp_directory)
+ def __init__(self, model, pdb_file=None,
+ naccess_binary='naccess', tmp_directory='/tmp'):
+ res_data, atm_data = run_naccess(model, pdb_file,
+ naccess=naccess_binary,
+ temp_path=tmp_directory)
naccess_dict = process_rsa_data(res_data)
res_list = []
- property_dict={}
- property_keys=[]
- property_list=[]
+ property_dict = {}
+ property_keys = []
+ property_list = []
# Now create a dictionary that maps Residue objects to accessibility
for chain in model:
- chain_id=chain.get_id()
+ chain_id = chain.get_id()
for res in chain:
- res_id=res.get_id()
+ res_id = res.get_id()
if (chain_id, res_id) in naccess_dict:
item = naccess_dict[(chain_id, res_id)]
res_name = item['res_name']
@@ -139,24 +143,24 @@
property_dict[(chain_id, res_id)] = item
property_keys.append((chain_id, res_id))
property_list.append((res, item))
- res.xtra["EXP_NACCESS"]=item
+ res.xtra["EXP_NACCESS"] = item
else:
pass
AbstractResiduePropertyMap.__init__(self, property_dict, property_keys,
- property_list)
+ property_list)
class NACCESS_atomic(AbstractAtomPropertyMap):
- def __init__(self, model, pdb_file = None,
- naccess_binary = 'naccess', tmp_directory = '/tmp'):
- res_data, atm_data = run_naccess(model, pdb_file, naccess = naccess_binary,
- temp_path = tmp_directory)
+ def __init__(self, model, pdb_file=None,
+ naccess_binary='naccess', tmp_directory='/tmp'):
+ res_data, atm_data = run_naccess(model, pdb_file,
+ naccess=naccess_binary,
+ temp_path=tmp_directory)
self.naccess_atom_dict = process_asa_data(atm_data)
- atom_list = []
- property_dict={}
- property_keys=[]
- property_list=[]
+ property_dict = {}
+ property_keys = []
+ property_list = []
# Now create a dictionary that maps Atom objects to accessibility
for chain in model:
chain_id = chain.get_id()
@@ -164,25 +168,25 @@
res_id = residue.get_id()
for atom in residue:
atom_id = atom.get_id()
- full_id=(chain_id, res_id, atom_id)
+ full_id = (chain_id, res_id, atom_id)
if full_id in self.naccess_atom_dict:
asa = self.naccess_atom_dict[full_id]
- property_dict[full_id]=asa
+ property_dict[full_id] = asa
property_keys.append((full_id))
property_list.append((atom, asa))
- atom.xtra['EXP_NACCESS']=asa
- AbstractAtomPropertyMap.__init__(self, property_dict, property_keys,
- property_list)
+ atom.xtra['EXP_NACCESS'] = asa
+ AbstractAtomPropertyMap.__init__(self, property_dict,
+ property_keys, property_list)
-if __name__=="__main__":
+if __name__ == "__main__":
import sys
from Bio.PDB import PDBParser
- p=PDBParser()
- s=p.get_structure('X', sys.argv[1])
- model=s[0]
+ p = PDBParser()
+ s = p.get_structure('X', sys.argv[1])
+ model = s[0]
n = NACCESS(model, sys.argv[1])
- for e in n.get_iterator():
- print e
+ for e in n:
+ print(e)
diff -Nru python-biopython-1.62/Bio/PDB/NeighborSearch.py python-biopython-1.63/Bio/PDB/NeighborSearch.py
--- python-biopython-1.62/Bio/PDB/NeighborSearch.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/NeighborSearch.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
"""Fast atom neighbor lookup using a KD tree (implemented in C++)."""
+from __future__ import print_function
+
import numpy
from Bio.KDTree import KDTree
@@ -133,7 +135,5 @@
for i in range(0, 20):
#Make a list of 100 atoms
al = [Atom() for j in range(100)]
-
- ns=NeighborSearch(al)
-
- print "Found ", len(ns.search_all(5.0))
+ ns = NeighborSearch(al)
+ print("Found %i" % len(ns.search_all(5.0)))
diff -Nru python-biopython-1.62/Bio/PDB/PDBIO.py python-biopython-1.63/Bio/PDB/PDBIO.py
--- python-biopython-1.62/Bio/PDB/PDBIO.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/PDBIO.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
"""Output of PDB files."""
+from Bio._py3k import basestring
+
from Bio.PDB.StructureBuilder import StructureBuilder # To allow saving of chains, residues, etc..
from Bio.Data.IUPACData import atom_weights # Allowed Elements
@@ -229,12 +231,11 @@
io.set_structure(s)
io.save("out1.pdb")
- fp=open("out2.pdb", "w")
- s1=p.get_structure("test1", sys.argv[1])
- s2=p.get_structure("test2", sys.argv[2])
- io=PDBIO(1)
- io.set_structure(s1)
- io.save(fp)
- io.set_structure(s2)
- io.save(fp, write_end=1)
- fp.close()
+ with open("out2.pdb", "w") as fp:
+ s1=p.get_structure("test1", sys.argv[1])
+ s2=p.get_structure("test2", sys.argv[2])
+ io=PDBIO(1)
+ io.set_structure(s1)
+ io.save(fp)
+ io.set_structure(s2)
+ io.save(fp, write_end=1)
diff -Nru python-biopython-1.62/Bio/PDB/PDBList.py python-biopython-1.63/Bio/PDB/PDBList.py
--- python-biopython-1.62/Bio/PDB/PDBList.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/PDBList.py 2013-12-05 14:10:43.000000000 +0000
@@ -20,15 +20,16 @@
""" Access the PDB over the internet (e.g. to download structures). """
-# For using with statement in Python 2.5 or Jython
-from __future__ import with_statement
+from __future__ import print_function
import contextlib
import gzip
import os
import shutil
-import urllib
-from urllib2 import urlopen as _urlopen # urllib made too many FTP conn's
+
+#Importing these functions with leading underscore as not intended for reuse
+from Bio._py3k import urlopen as _urlopen
+from Bio._py3k import urlretrieve as _urlretrieve
class PDBList(object):
@@ -125,7 +126,7 @@
PDB entries and some annotation to them.
Returns a list of PDB codes in the index file.
"""
- print "retrieving index file. Takes about 5 MB."
+ print("retrieving index file. Takes about 5 MB.")
url = self.pdb_server + '/pub/pdb/derived_data/index/entries.idx'
with contextlib.closing(_urlopen(url)) as handle:
all_entries = [line[:4] for line in handle.readlines()[2:]
@@ -205,12 +206,12 @@
# Skip download if the file already exists
if not self.overwrite:
if os.path.exists(final_file):
- print "Structure exists: '%s' " % final_file
+ print("Structure exists: '%s' " % final_file)
return final_file
# Retrieve the file
- print "Downloading PDB structure '%s'..." % pdb_code
- urllib.urlretrieve(url, filename)
+ print("Downloading PDB structure '%s'..." % pdb_code)
+ _urlretrieve(url, filename)
# Uncompress the archive, delete when done
#Can't use context manager with gzip.open until Python 2.7
@@ -238,7 +239,7 @@
try:
self.retrieve_pdb_file(pdb_code)
except Exception:
- print 'error %s\n' % pdb_code
+ print('error %s\n' % pdb_code)
# you can insert here some more log notes that
# something has gone wrong.
@@ -259,11 +260,11 @@
try:
shutil.move(old_file, new_file)
except Exception:
- print "Could not move %s to obsolete folder" % old_file
+ print("Could not move %s to obsolete folder" % old_file)
elif os.path.isfile(new_file):
- print "Obsolete file %s already moved" % old_file
+ print("Obsolete file %s already moved" % old_file)
else:
- print "Obsolete file %s is missing" % old_file
+ print("Obsolete file %s is missing" % old_file)
def download_entire_pdb(self, listfile=None):
"""Retrieve all PDB entries not present in the local PDB copy.
@@ -299,9 +300,9 @@
"""Retrieves a (big) file containing all the sequences of PDB entries
and writes it to a file.
"""
- print "Retrieving sequence file (takes about 15 MB)."
+ print("Retrieving sequence file (takes about 15 MB).")
url = self.pdb_server + '/pub/pdb/derived_data/pdb_seqres.txt'
- urllib.urlretrieve(url, savefile)
+ _urlretrieve(url, savefile)
if __name__ == '__main__':
@@ -324,7 +325,7 @@
-d A single directory will be used as , not a tree.
-o Overwrite existing structure files.
"""
- print doc
+ print(doc)
if len(sys.argv) > 2:
pdb_path = sys.argv[2]
@@ -344,7 +345,7 @@
if len(sys.argv) > 1:
if sys.argv[1] == 'update':
# update PDB
- print "updating local PDB at " + pdb_path
+ print("updating local PDB at " + pdb_path)
pl.update_pdb()
elif sys.argv[1] == 'all':
diff -Nru python-biopython-1.62/Bio/PDB/PDBParser.py python-biopython-1.63/Bio/PDB/PDBParser.py
--- python-biopython-1.62/Bio/PDB/PDBParser.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/PDBParser.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,8 +5,7 @@
"""Parser for PDB files."""
-# For using with statement in Python 2.5 or Jython
-from __future__ import with_statement
+from __future__ import print_function
import warnings
@@ -215,24 +214,24 @@
current_resname = resname
try:
structure_builder.init_residue(resname, hetero_flag, resseq, icode)
- except PDBConstructionException, message:
+ except PDBConstructionException as message:
self._handle_PDB_exception(message, global_line_counter)
elif current_residue_id != residue_id or current_resname != resname:
current_residue_id = residue_id
current_resname = resname
try:
structure_builder.init_residue(resname, hetero_flag, resseq, icode)
- except PDBConstructionException, message:
+ except PDBConstructionException as message:
self._handle_PDB_exception(message, global_line_counter)
# init atom
try:
structure_builder.init_atom(name, coord, bfactor, occupancy, altloc,
fullname, serial_number, element)
- except PDBConstructionException, message:
+ except PDBConstructionException as message:
self._handle_PDB_exception(message, global_line_counter)
elif record_type == "ANISOU":
- anisou = map(float, (line[28:35], line[35:42], line[43:49],
- line[49:56], line[56:63], line[63:70]))
+ anisou = [float(x) for x in (line[28:35], line[35:42], line[43:49],
+ line[49:56], line[56:63], line[63:70])]
# U's are scaled by 10^4
anisou_array = (numpy.array(anisou, "f") / 10000.0).astype("f")
structure_builder.set_anisou(anisou_array)
@@ -258,15 +257,15 @@
current_residue_id = None
elif record_type == "SIGUIJ":
# standard deviation of anisotropic B factor
- siguij = map(float, (line[28:35], line[35:42], line[42:49],
- line[49:56], line[56:63], line[63:70]))
+ siguij = [float(x) for x in (line[28:35], line[35:42], line[42:49],
+ line[49:56], line[56:63], line[63:70])]
# U sigma's are scaled by 10^4
siguij_array = (numpy.array(siguij, "f") / 10000.0).astype("f")
structure_builder.set_siguij(siguij_array)
elif record_type == "SIGATM":
# standard deviation of atomic positions
- sigatm = map(float, (line[30:38], line[38:45], line[46:54],
- line[54:60], line[60:66]))
+ sigatm = [float(x) for x in (line[30:38], line[38:45], line[46:54],
+ line[54:60], line[60:66])]
sigatm_array = numpy.array(sigatm, "f")
structure_builder.set_sigatm(sigatm_array)
local_line_counter += 1
@@ -308,10 +307,10 @@
p = c.get_parent()
assert(p is m)
for r in c:
- print r
+ print(r)
p = r.get_parent()
assert(p is c)
for a in r:
p = a.get_parent()
if not p is r:
- print p, r
+ print("%s %s" % (p, r))
diff -Nru python-biopython-1.62/Bio/PDB/PSEA.py python-biopython-1.63/Bio/PDB/PSEA.py
--- python-biopython-1.62/Bio/PDB/PSEA.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/PSEA.py 2013-12-05 14:10:43.000000000 +0000
@@ -41,17 +41,16 @@
fname=run_psea(pname)
start=0
ss=""
- fp=open(fname, 'r')
- for l in fp.readlines():
- if l[0:6]==">p-sea":
- start=1
- continue
- if not start:
- continue
- if l[0]=="\n":
- break
- ss=ss+l[0:-1]
- fp.close()
+ with open(fname, 'r') as fp:
+ for l in fp.readlines():
+ if l[0:6]==">p-sea":
+ start=1
+ continue
+ if not start:
+ continue
+ if l[0]=="\n":
+ break
+ ss=ss+l[0:-1]
return ss
diff -Nru python-biopython-1.62/Bio/PDB/Polypeptide.py python-biopython-1.63/Bio/PDB/Polypeptide.py
--- python-biopython-1.62/Bio/PDB/Polypeptide.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/Polypeptide.py 2013-12-05 14:10:43.000000000 +0000
@@ -12,7 +12,7 @@
>>> structure = PDBParser().get_structure('2BEG', 'PDB/2BEG.pdb')
>>> ppb=PPBuilder()
>>> for pp in ppb.build_peptides(structure):
- ... print pp.get_sequence()
+ ... print(pp.get_sequence())
LVFFAEDVGSNKGAIIGLMVGGVVIA
LVFFAEDVGSNKGAIIGLMVGGVVIA
LVFFAEDVGSNKGAIIGLMVGGVVIA
@@ -27,7 +27,7 @@
>>> structure = PDBParser().get_structure('1A8O', 'PDB/1A8O.pdb')
>>> ppb=PPBuilder()
>>> for pp in ppb.build_peptides(structure):
- ... print pp.get_sequence()
+ ... print(pp.get_sequence())
DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW
TETLLVQNANPDCKTILKALGPGATLEE
TACQG
@@ -35,10 +35,10 @@
If you want to, you can include non-standard amino acids in the peptides:
>>> for pp in ppb.build_peptides(structure, aa_only=False):
- ... print pp.get_sequence()
- ... print pp.get_sequence()[0], pp[0].get_resname()
- ... print pp.get_sequence()[-7], pp[-7].get_resname()
- ... print pp.get_sequence()[-6], pp[-6].get_resname()
+ ... print(pp.get_sequence())
+ ... print("%s %s" % (pp.get_sequence()[0], pp[0].get_resname()))
+ ... print("%s %s" % (pp.get_sequence()[-7], pp[-7].get_resname()))
+ ... print("%s %s" % (pp.get_sequence()[-6], pp[-6].get_resname()))
MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG
M MSE
M MSE
@@ -48,6 +48,9 @@
last residues) have been shown as M (methionine) by the get_sequence method.
"""
+from __future__ import print_function
+from Bio._py3k import basestring
+
import warnings
from Bio.Alphabet import generic_protein
@@ -351,9 +354,9 @@
for chain in chain_list:
chain_it=iter(chain)
try:
- prev_res = chain_it.next()
+ prev_res = next(chain_it)
while not accept(prev_res, aa_only):
- prev_res = chain_it.next()
+ prev_res = next(chain_it)
except StopIteration:
#No interesting residues at all in this chain
continue
@@ -460,24 +463,25 @@
ppb=PPBuilder()
- print "C-N"
+ print("C-N")
for pp in ppb.build_peptides(s):
- print pp.get_sequence()
+ print(pp.get_sequence())
for pp in ppb.build_peptides(s[0]):
- print pp.get_sequence()
+ print(pp.get_sequence())
for pp in ppb.build_peptides(s[0]["A"]):
- print pp.get_sequence()
+ print(pp.get_sequence())
for pp in ppb.build_peptides(s):
for phi, psi in pp.get_phi_psi_list():
- print phi, psi
+ print("%f %f" % (phi, psi))
ppb=CaPPBuilder()
- print "CA-CA"
+ print("CA-CA")
for pp in ppb.build_peptides(s):
- print pp.get_sequence()
+ print(pp.get_sequence())
for pp in ppb.build_peptides(s[0]):
- print pp.get_sequence()
+ print(pp.get_sequence())
for pp in ppb.build_peptides(s[0]["A"]):
- print pp.get_sequence()
+ print(pp.get_sequence())
+
diff -Nru python-biopython-1.62/Bio/PDB/ResidueDepth.py python-biopython-1.63/Bio/PDB/ResidueDepth.py
--- python-biopython-1.62/Bio/PDB/ResidueDepth.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/ResidueDepth.py 2013-12-05 14:10:43.000000000 +0000
@@ -15,7 +15,7 @@
Residue Depth:
>>> rd = ResidueDepth(model, pdb_file)
- >>> print rd[(chain_id, res_id)]
+ >>> print(rd[(chain_id, res_id)])
Direct MSMS interface:
@@ -39,6 +39,8 @@
>>> rd = residue_depth(residue, surface)
"""
+from __future__ import print_function
+
import os
import tempfile
@@ -53,16 +55,15 @@
"""
Read the vertex list into a Numeric array.
"""
- fp=open(filename, "r")
- vertex_list=[]
- for l in fp.readlines():
- sl=l.split()
- if not len(sl)==9:
- # skip header
- continue
- vl=map(float, sl[0:3])
- vertex_list.append(vl)
- fp.close()
+ with open(filename, "r") as fp:
+ vertex_list=[]
+ for l in fp.readlines():
+ sl=l.split()
+ if not len(sl)==9:
+ # skip header
+ continue
+ vl = [float(x) for x in sl[0:3]]
+ vertex_list.append(vl)
return numpy.array(vertex_list)
@@ -173,4 +174,4 @@
rd=ResidueDepth(model, sys.argv[1])
for item in rd:
- print item
+ print(item)
diff -Nru python-biopython-1.62/Bio/PDB/Selection.py python-biopython-1.63/Bio/PDB/Selection.py
--- python-biopython-1.62/Bio/PDB/Selection.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/Selection.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
"""Selection of atoms, residues, etc."""
+from __future__ import print_function
+
import itertools
from Bio.PDB.Atom import Atom
@@ -79,9 +81,9 @@
def _test():
"""Run the Bio.PDB.Selection module's doctests (PRIVATE)."""
import doctest
- print "Running doctests ..."
+ print("Running doctests ...")
doctest.testmod()
- print "Done"
+ print("Done")
if __name__ == "__main__":
diff -Nru python-biopython-1.62/Bio/PDB/StructureAlignment.py python-biopython-1.63/Bio/PDB/StructureAlignment.py
--- python-biopython-1.62/Bio/PDB/StructureAlignment.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/StructureAlignment.py 2013-12-05 14:10:43.000000000 +0000
@@ -7,6 +7,8 @@
file.
"""
+from __future__ import print_function
+
from Bio.Data import SCOPData
from Bio.PDB import Selection
@@ -43,7 +45,7 @@
aa2=column[sj]
if aa1!="-":
# Position in seq1 is not -
- while 1:
+ while True:
# Loop until an aa is found
r1=rl1[p1]
p1=p1+1
@@ -54,7 +56,7 @@
r1=None
if aa2!="-":
# Position in seq2 is not -
- while 1:
+ while True:
# Loop until an aa is found
r2=rl2[p2]
p2=p2+1
@@ -103,10 +105,10 @@
from Bio.PDB import PDBParser
if len(sys.argv) != 4:
- print "Expects three arguments,"
- print " - FASTA alignment filename (expect two sequences)"
- print " - PDB file one"
- print " - PDB file two"
+ print("Expects three arguments,")
+ print(" - FASTA alignment filename (expect two sequences)")
+ print(" - PDB file one")
+ print(" - PDB file two")
sys.exit()
# The alignment
@@ -128,5 +130,5 @@
al=StructureAlignment(fa, m1, m2)
# Print aligned pairs (r is None if gap)
- for (r1,r2) in al.get_iterator():
- print r1, r2
+ for (r1, r2) in al.get_iterator():
+ print("%s %s" % (r1, r2))
diff -Nru python-biopython-1.62/Bio/PDB/StructureBuilder.py python-biopython-1.63/Bio/PDB/StructureBuilder.py
--- python-biopython-1.62/Bio/PDB/StructureBuilder.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/StructureBuilder.py 2013-12-05 14:10:43.000000000 +0000
@@ -69,7 +69,7 @@
o id - int
o serial_num - int
"""
- self.model=Model(model_id,serial_num)
+ self.model=Model(model_id, serial_num)
self.structure.add(self.model)
def init_chain(self, chain_id):
diff -Nru python-biopython-1.62/Bio/PDB/Superimposer.py python-biopython-1.63/Bio/PDB/Superimposer.py
--- python-biopython-1.62/Bio/PDB/Superimposer.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/Superimposer.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
"""Superimpose two structures."""
+from __future__ import print_function
+
import numpy
from Bio.SVDSuperimposer import SVDSuperimposer
@@ -78,7 +80,7 @@
sup.set_atoms(fixed, moving)
- print sup.rotran
- print sup.rms
+ print(sup.rotran)
+ print(sup.rms)
sup.apply(moving)
diff -Nru python-biopython-1.62/Bio/PDB/Vector.py python-biopython-1.63/Bio/PDB/Vector.py
--- python-biopython-1.62/Bio/PDB/Vector.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/Vector.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
"""Vector class, including rotation-related functions."""
+from __future__ import print_function
+
import numpy
@@ -20,33 +22,33 @@
angle=numpy.arccos(t)
if angle<1e-15:
# Angle is 0
- return 0.0, Vector(1,0,0)
+ return 0.0, Vector(1, 0, 0)
elif anglem11 and m00>m22:
x=numpy.sqrt(m00-m11-m22+0.5)
- y=m[0,1]/(2*x)
- z=m[0,2]/(2*x)
+ y=m[0, 1]/(2*x)
+ z=m[0, 2]/(2*x)
elif m11>m00 and m11>m22:
y=numpy.sqrt(m11-m00-m22+0.5)
- x=m[0,1]/(2*y)
- z=m[1,2]/(2*y)
+ x=m[0, 1]/(2*y)
+ z=m[1, 2]/(2*y)
else:
z=numpy.sqrt(m22-m00-m11+0.5)
- x=m[0,2]/(2*z)
- y=m[1,2]/(2*z)
- axis=Vector(x,y,z)
+ x=m[0, 2]/(2*z)
+ y=m[1, 2]/(2*z)
+ axis=Vector(x, y, z)
axis.normalize()
return numpy.pi, axis
@@ -76,7 +78,7 @@
Example:
- >>> m=rotaxis(pi, Vector(1,0,0))
+ >>> m=rotaxis(pi, Vector(1, 0, 0))
>>> rotated_vector=any_vector.left_multiply(m)
@type theta: float
@@ -93,33 +95,34 @@
c=numpy.cos(theta)
s=numpy.sin(theta)
t=1-c
- x,y,z=vector.get_array()
- rot=numpy.zeros((3,3))
+ x, y, z=vector.get_array()
+ rot=numpy.zeros((3, 3))
# 1st row
- rot[0,0]=t*x*x+c
- rot[0,1]=t*x*y-s*z
- rot[0,2]=t*x*z+s*y
+ rot[0, 0]=t*x*x+c
+ rot[0, 1]=t*x*y-s*z
+ rot[0, 2]=t*x*z+s*y
# 2nd row
- rot[1,0]=t*x*y+s*z
- rot[1,1]=t*y*y+c
- rot[1,2]=t*y*z-s*x
+ rot[1, 0]=t*x*y+s*z
+ rot[1, 1]=t*y*y+c
+ rot[1, 2]=t*y*z-s*x
# 3rd row
- rot[2,0]=t*x*z-s*y
- rot[2,1]=t*y*z+s*x
- rot[2,2]=t*z*z+c
+ rot[2, 0]=t*x*z-s*y
+ rot[2, 1]=t*y*z+s*x
+ rot[2, 2]=t*z*z+c
return rot
rotaxis=rotaxis2m
-def refmat(p,q):
+def refmat(p, q):
"""
Return a (left multiplying) matrix that mirrors p onto q.
Example:
- >>> mirror=refmat(p,q)
+ >>> mirror=refmat(p, q)
>>> qq=p.left_multiply(mirror)
- >>> print q, qq # q and qq should be the same
+ >>> print(q)
+ >>> print(qq) # q and qq should be the same
@type p,q: L{Vector}
@return: The mirror operation, a 3x3 Numeric array.
@@ -137,13 +140,14 @@
return ref
-def rotmat(p,q):
+def rotmat(p, q):
"""
Return a (left multiplying) matrix that rotates p onto q.
Example:
- >>> r=rotmat(p,q)
- >>> print q, p.left_multiply(r)
+ >>> r=rotmat(p, q)
+ >>> print(q)
+ >>> print(p.left_multiply(r))
@param p: moving vector
@type p: L{Vector}
@@ -215,8 +219,8 @@
self._ar=numpy.array((x, y, z), 'd')
def __repr__(self):
- x,y,z=self._ar
- return "" % (x,y,z)
+ x, y, z=self._ar
+ return "" % (x, y, z)
def __neg__(self):
"Return Vector(-x, -y, -z)"
@@ -251,12 +255,12 @@
def __pow__(self, other):
"Return VectorxVector (cross product) or Vectorxscalar"
if isinstance(other, Vector):
- a,b,c=self._ar
- d,e,f=other._ar
- c1=numpy.linalg.det(numpy.array(((b,c), (e,f))))
- c2=-numpy.linalg.det(numpy.array(((a,c), (d,f))))
- c3=numpy.linalg.det(numpy.array(((a,b), (d,e))))
- return Vector(c1,c2,c3)
+ a, b, c=self._ar
+ d, e, f=other._ar
+ c1=numpy.linalg.det(numpy.array(((b, c), (e, f))))
+ c2=-numpy.linalg.det(numpy.array(((a, c), (d, f))))
+ c3=numpy.linalg.det(numpy.array(((a, b), (d, e))))
+ return Vector(c1, c2, c3)
else:
a=self._ar*numpy.array(other)
return Vector(a)
@@ -294,8 +298,8 @@
n2=other.norm()
c=(self*other)/(n1*n2)
# Take care of roundoff errors
- c=min(c,1)
- c=max(-1,c)
+ c=min(c, 1)
+ c=max(-1, c)
return numpy.arccos(c)
def get_array(self):
@@ -320,59 +324,59 @@
from numpy.random import random
- v1=Vector(0,0,1)
- v2=Vector(0,0,0)
- v3=Vector(0,1,0)
- v4=Vector(1,1,0)
+ v1=Vector(0, 0, 1)
+ v2=Vector(0, 0, 0)
+ v3=Vector(0, 1, 0)
+ v4=Vector(1, 1, 0)
v4.normalize()
- print v4
+ print(v4)
- print calc_angle(v1, v2, v3)
+ print(calc_angle(v1, v2, v3))
dih=calc_dihedral(v1, v2, v3, v4)
# Test dihedral sign
assert(dih>0)
- print "DIHEDRAL ", dih
+ print("DIHEDRAL %f" % dih)
ref=refmat(v1, v3)
rot=rotmat(v1, v3)
- print v3
- print v1.left_multiply(ref)
- print v1.left_multiply(rot)
- print v1.right_multiply(numpy.transpose(rot))
+ print(v3)
+ print(v1.left_multiply(ref))
+ print(v1.left_multiply(rot))
+ print(v1.right_multiply(numpy.transpose(rot)))
# -
- print v1-v2
- print v1-1
- print v1+(1,2,3)
+ print(v1-v2)
+ print(v1-1)
+ print(v1+(1, 2, 3))
# +
- print v1+v2
- print v1+3
- print v1-(1,2,3)
+ print(v1+v2)
+ print(v1+3)
+ print(v1-(1, 2, 3))
# *
- print v1*v2
+ print(v1*v2)
# /
- print v1/2
- print v1/(1,2,3)
+ print(v1/2)
+ print(v1/(1, 2, 3))
# **
- print v1**v2
- print v1**2
- print v1**(1,2,3)
+ print(v1**v2)
+ print(v1**2)
+ print(v1**(1, 2, 3))
# norm
- print v1.norm()
+ print(v1.norm())
# norm squared
- print v1.normsq()
+ print(v1.normsq())
# setitem
v1[2]=10
- print v1
+ print(v1)
# getitem
- print v1[2]
+ print(v1[2])
- print numpy.array(v1)
+ print(numpy.array(v1))
- print "ROT"
+ print("ROT")
angle=random()*numpy.pi
axis=Vector(random(3)-random(3))
@@ -382,6 +386,7 @@
cangle, caxis=m2rotaxis(m)
- print angle-cangle
- print axis-caxis
- print
+ print(angle-cangle)
+ print(axis-caxis)
+ print("")
+
diff -Nru python-biopython-1.62/Bio/PDB/__init__.py python-biopython-1.63/Bio/PDB/__init__.py
--- python-biopython-1.62/Bio/PDB/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -12,63 +12,63 @@
"""
# Get a Structure object from a PDB file
-from PDBParser import PDBParser
+from .PDBParser import PDBParser
try:
# Get a Structure object from an mmCIF file
- from MMCIFParser import MMCIFParser
+ from .MMCIFParser import MMCIFParser
except:
# Not compiled I guess
pass
# Download from the PDB
-from PDBList import PDBList
+from .PDBList import PDBList
# Parse PDB header directly
-from parse_pdb_header import parse_pdb_header
+from .parse_pdb_header import parse_pdb_header
# Find connected polypeptides in a Structure
-from Polypeptide import PPBuilder, CaPPBuilder, is_aa, standard_aa_names
+from .Polypeptide import PPBuilder, CaPPBuilder, is_aa, standard_aa_names
# This is also useful :-)
from Bio.Data.SCOPData import protein_letters_3to1
# IO of PDB files (including flexible selective output)
-from PDBIO import PDBIO, Select
+from .PDBIO import PDBIO, Select
# Some methods to eg. get a list of Residues
# from a list of Atoms.
-import Selection
+from . import Selection
# Superimpose atom sets
-from Superimposer import Superimposer
+from .Superimposer import Superimposer
# 3D vector class
-from Vector import Vector, calc_angle, calc_dihedral, refmat, rotmat, rotaxis
-from Vector import vector_to_axis, m2rotaxis, rotaxis2m
+from .Vector import Vector, calc_angle, calc_dihedral, refmat, rotmat, rotaxis
+from .Vector import vector_to_axis, m2rotaxis, rotaxis2m
# Alignment module
-from StructureAlignment import StructureAlignment
+from .StructureAlignment import StructureAlignment
# DSSP handle
# (secondary structure and solvent accessible area calculation)
-from DSSP import DSSP, make_dssp_dict
+from .DSSP import DSSP, make_dssp_dict
# Residue depth:
# distance of residue atoms from solvent accessible surface
-from ResidueDepth import ResidueDepth, get_surface
+from .ResidueDepth import ResidueDepth, get_surface
# Calculation of Half Sphere Solvent Exposure
-from HSExposure import HSExposureCA, HSExposureCB, ExposureCN
+from .HSExposure import HSExposureCA, HSExposureCB, ExposureCN
# Kolodny et al.'s backbone libraries
-from FragmentMapper import FragmentMapper
+from .FragmentMapper import FragmentMapper
# Write out chain(start-end) to PDB file
-from Dice import extract
+from .Dice import extract
# Fast atom neighbor search
# Depends on KDTree C++ module
try:
- from NeighborSearch import NeighborSearch
+ from .NeighborSearch import NeighborSearch
except ImportError:
pass
diff -Nru python-biopython-1.62/Bio/PDB/parse_pdb_header.py python-biopython-1.63/Bio/PDB/parse_pdb_header.py
--- python-biopython-1.62/Bio/PDB/parse_pdb_header.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PDB/parse_pdb_header.py 2013-12-05 14:10:43.000000000 +0000
@@ -23,8 +23,8 @@
"""Parse the header of a PDB file."""
-# For 'with' on Python 2.5/Jython 2.5
-from __future__ import with_statement
+from __future__ import print_function
+
import re
from Bio import File
@@ -34,9 +34,9 @@
# JRNL AUTH L.CHEN,M.DOI,F.S.MATHEWS,A.Y.CHISTOSERDOV, 2BBK 7
journal=""
for l in inl:
- if re.search("\AJRNL",l):
+ if re.search("\AJRNL", l):
journal+=l[19:72].lower()
- journal=re.sub("\s\s+"," ",journal)
+ journal=re.sub("\s\s+", " ", journal)
return journal
@@ -46,10 +46,10 @@
references=[]
actref=""
for l in inl:
- if re.search("\AREMARK 1",l):
- if re.search("\AREMARK 1 REFERENCE",l):
+ if re.search("\AREMARK 1", l):
+ if re.search("\AREMARK 1 REFERENCE", l):
if actref!="":
- actref=re.sub("\s\s+"," ",actref)
+ actref=re.sub("\s\s+", " ", actref)
if actref!=" ":
references.append(actref)
actref=""
@@ -57,7 +57,7 @@
actref+=l[19:72].lower()
if actref!="":
- actref=re.sub("\s\s+"," ",actref)
+ actref=re.sub("\s\s+", " ", actref)
if actref!=" ":
references.append(actref)
return references
@@ -73,8 +73,8 @@
else:
century=1900
date=str(century+year)+"-"
- all_months=['xxx','Jan','Feb','Mar','Apr','May','Jun','Jul',
- 'Aug','Sep','Oct','Nov','Dec']
+ all_months=['xxx', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
+ 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
month=str(all_months.index(pdb_date[3:6]))
if len(month)==1:
month = '0'+month
@@ -84,12 +84,12 @@
def _chop_end_codes(line):
"""Chops lines ending with ' 1CSA 14' and the like."""
- return re.sub("\s\s\s\s+[\w]{4}.\s+\d*\Z","",line)
+ return re.sub("\s\s\s\s+[\w]{4}.\s+\d*\Z", "", line)
def _chop_end_misc(line):
"""Chops lines ending with ' 14-JUL-97 1CSA' and the like."""
- return re.sub("\s\s\s\s+.*\Z","",line)
+ return re.sub("\s\s\s\s+.*\Z", "", line)
def _nice_case(line):
@@ -152,12 +152,12 @@
last_src_key="misc"
for hh in header:
- h=re.sub("[\s\n\r]*\Z","",hh) # chop linebreaks off
+ h=re.sub("[\s\n\r]*\Z", "", hh) # chop linebreaks off
#key=re.sub("\s.+\s*","",h)
key = h[:6].strip()
#tail=re.sub("\A\w+\s+\d*\s*","",h)
tail = h[10:].strip()
- # print key+":"+tail
+ # print("%s:%s" % (key, tail)
# From here, all the keys from the header are being parsed
if key=="TITLE":
@@ -167,22 +167,22 @@
else:
dict['name']=name
elif key=="HEADER":
- rr=re.search("\d\d-\w\w\w-\d\d",tail)
+ rr=re.search("\d\d-\w\w\w-\d\d", tail)
if rr is not None:
dict['deposition_date']=_format_date(_nice_case(rr.group()))
head=_chop_end_misc(tail).lower()
dict['head']=head
elif key=="COMPND":
- tt=re.sub("\;\s*\Z","",_chop_end_codes(tail)).lower()
+ tt=re.sub("\;\s*\Z", "", _chop_end_codes(tail)).lower()
# look for E.C. numbers in COMPND lines
- rec = re.search('\d+\.\d+\.\d+\.\d+',tt)
+ rec = re.search('\d+\.\d+\.\d+\.\d+', tt)
if rec:
dict['compound'][comp_molid]['ec_number']=rec.group()
- tt=re.sub("\((e\.c\.)*\d+\.\d+\.\d+\.\d+\)","",tt)
+ tt=re.sub("\((e\.c\.)*\d+\.\d+\.\d+\.\d+\)", "", tt)
tok=tt.split(":")
if len(tok)>=2:
ckey=tok[0]
- cval=re.sub("\A\s*","",tok[1])
+ cval=re.sub("\A\s*", "", tok[1])
if ckey=='mol_id':
dict['compound'][cval]={'misc':''}
comp_molid=cval
@@ -193,12 +193,12 @@
else:
dict['compound'][comp_molid][last_comp_key]+=tok[0]+" "
elif key=="SOURCE":
- tt=re.sub("\;\s*\Z","",_chop_end_codes(tail)).lower()
+ tt=re.sub("\;\s*\Z", "", _chop_end_codes(tail)).lower()
tok=tt.split(":")
- # print tok
+ # print(tok)
if len(tok)>=2:
ckey=tok[0]
- cval=re.sub("\A\s*","",tok[1])
+ cval=re.sub("\A\s*", "", tok[1])
if ckey=='mol_id':
dict['source'][cval]={'misc':''}
comp_molid=cval
@@ -217,7 +217,7 @@
elif key=="EXPDTA":
expd=_chop_end_codes(tail)
# chop junk at end of lines for some structures
- expd=re.sub('\s\s\s\s\s\s\s.*\Z','',expd)
+ expd=re.sub('\s\s\s\s\s\s\s.*\Z', '', expd)
# if re.search('\Anmr',expd,re.IGNORECASE): expd='nmr'
# if re.search('x-ray diffraction',expd,re.IGNORECASE): expd='x-ray diffraction'
dict['structure_method']=expd.lower()
@@ -225,11 +225,11 @@
# make Annotation entries out of these!!!
pass
elif key=="REVDAT":
- rr=re.search("\d\d-\w\w\w-\d\d",tail)
+ rr=re.search("\d\d-\w\w\w-\d\d", tail)
if rr is not None:
dict['release_date']=_format_date(_nice_case(rr.group()))
elif key=="JRNL":
- # print key,tail
+ # print("%s:%s" % (key, tail))
if 'journal' in dict:
dict['journal']+=tail
else:
@@ -241,16 +241,16 @@
else:
dict['author']=auth
elif key=="REMARK":
- if re.search("REMARK 2 RESOLUTION.",hh):
- r=_chop_end_codes(re.sub("REMARK 2 RESOLUTION.",'',hh))
- r=re.sub("\s+ANGSTROM.*","",r)
+ if re.search("REMARK 2 RESOLUTION.", hh):
+ r=_chop_end_codes(re.sub("REMARK 2 RESOLUTION.", '', hh))
+ r=re.sub("\s+ANGSTROM.*", "", r)
try:
dict['resolution']=float(r)
except:
- #print 'nonstandard resolution',r
+ #print('nonstandard resolution %r' % r)
dict['resolution']=None
else:
- # print key
+ # print(key)
pass
if dict['structure_method']=='unknown':
if dict['resolution']>0.0:
@@ -262,12 +262,11 @@
# some data and returns it as a dictionary.
import sys
filename = sys.argv[1]
- handle = open(filename,'r')
- data_dict = parse_pdb_header(handle)
- handle.close()
+ with open(filename, 'r') as handle:
+ data_dict = parse_pdb_header(handle)
# print the dictionary
- for k, y in data_dict.iteritems():
- print "-"*40
- print k
- print y
+ for k, y in data_dict.items():
+ print("-"*40)
+ print(k)
+ print(y)
diff -Nru python-biopython-1.62/Bio/ParserSupport.py python-biopython-1.63/Bio/ParserSupport.py
--- python-biopython-1.62/Bio/ParserSupport.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/ParserSupport.py 2013-12-05 14:10:43.000000000 +0000
@@ -3,16 +3,14 @@
# license. Please see the LICENSE file that should have been included
# as part of this package.
-"""Code to support writing parsers (OBSOLETE).
-
-
+"""Code to support writing parsers (DEPRECATED).
Classes:
AbstractParser Base class for parsers.
AbstractConsumer Base class of all Consumers.
TaggingConsumer Consumer that tags output with its event. For debugging
EventGenerator Generate Biopython Events from Martel XML output
- (note that Martel is now DEPRECATED)
+ (note that Martel has been removed)
Functions:
safe_readline Read a line from a handle, with check for EOF.
@@ -25,10 +23,10 @@
"""
-
+from Bio import BiopythonDeprecationWarning
import warnings
-warnings.warn("The module Bio.ParserSupport is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning)
-
+warnings.warn("Bio.ParserSupport is now deprecated will be removed in a "
+ "future release of Biopython.", BiopythonDeprecationWarning)
import sys
try:
@@ -37,7 +35,8 @@
#Python 3, see http://bugs.python.org/issue8206
InstanceType = object
from types import MethodType
-import StringIO
+
+from Bio._py3k import StringIO
from Bio import File
@@ -59,14 +58,11 @@
raise NotImplementedError("Please implement in a derived class")
def parse_str(self, string):
- return self.parse(StringIO.StringIO(string))
+ return self.parse(StringIO(string))
def parse_file(self, filename):
- h = open(filename)
- try:
+ with open(filename) as h:
retval = self.parse(h)
- finally:
- h.close()
return retval
@@ -298,7 +294,7 @@
"""
nlines = 0
- while 1:
+ while True:
line = safe_readline(uhandle)
# If I've failed the condition, then stop reading the line.
if _fails_conditions(*(line,), **keywds):
@@ -320,7 +316,7 @@
"""
nlines = 0
- while 1:
+ while True:
line = safe_readline(uhandle)
# If I've met the condition, then stop reading the line.
if not _fails_conditions(*(line,), **keywds):
diff -Nru python-biopython-1.62/Bio/Pathway/Rep/Graph.py python-biopython-1.63/Bio/Pathway/Rep/Graph.py
--- python-biopython-1.62/Bio/Pathway/Rep/Graph.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Pathway/Rep/Graph.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
# get set abstraction for graph representation
+from functools import reduce
+
class Graph(object):
"""A directed graph abstraction with labeled edges."""
@@ -29,23 +31,20 @@
return not self.__eq__(g)
def __repr__(self):
- """Returns an unique string representation of this graph."""
+ """Returns a unique string representation of this graph."""
s = ""
def __str__(self):
"""Returns a concise string description of this graph."""
- nodenum = len(self._adjacency_list.keys())
- edgenum = reduce(lambda x,y: x+y,
- map(len, self._adjacency_list.values()))
- labelnum = len(self._label_map.keys())
+ nodenum = len(self._adjacency_list)
+ edgenum = reduce(lambda x, y: x+y,
+ [len(v) for v in self._adjacency_list.values()])
+ labelnum = len(self._label_map)
return " node: " + str(source))
if to not in self._adjacency_list:
raise ValueError("Unknown node: " + str(to))
- if (source,to) in self._edge_map:
+ if (source, to) in self._edge_map:
raise ValueError(str(source) + " -> " + str(to) + " exists")
self._adjacency_list[source].add(to)
if label not in self._label_map:
self._label_map[label] = set()
- self._label_map[label].add((source,to))
- self._edge_map[(source,to)] = label
+ self._label_map[label].add((source, to))
+ self._edge_map[(source, to)] = label
def child_edges(self, parent):
"""Returns a list of (child, label) pairs for parent."""
if parent not in self._adjacency_list:
raise ValueError("Unknown node: " + str(parent))
- return [(x, self._edge_map[(parent,x)])
+ return [(x, self._edge_map[(parent, x)])
for x in sorted(self._adjacency_list[parent])]
def children(self, parent):
@@ -89,18 +88,18 @@
def labels(self):
"""Returns a list of all the edge labels in this graph."""
- return self._label_map.keys()
+ return list(self._label_map.keys())
def nodes(self):
"""Returns a list of the nodes in this graph."""
- return self._adjacency_list.keys()
+ return list(self._adjacency_list.keys())
def parent_edges(self, child):
"""Returns a list of (parent, label) pairs for child."""
if child not in self._adjacency_list:
raise ValueError("Unknown node: " + str(child))
parents = []
- for parent, children in self._adjacency_list.iteritems():
+ for parent, children in self._adjacency_list.items():
for x in children:
if x is child:
parents.append((parent, self._edge_map[(parent, child)]))
@@ -108,7 +107,7 @@
def parents(self, child):
"""Returns a list of unique parents for child."""
- return sorted(set([x[0] for x in self.parent_edges(child)]))
+ return sorted(set(x[0] for x in self.parent_edges(child)))
def remove_node(self, node):
"""Removes node and all edges connected to it."""
@@ -121,7 +120,7 @@
self._adjacency_list[n] = set(x for x in self._adjacency_list[n]
if x is not node)
# remove all refering pairs in label map
- for label in self._label_map.keys():
+ for label in list(self._label_map.keys()): # we're editing this!
lm = set(x for x in self._label_map[label]
if (x[0] is not node) and (x[1] is not node))
# remove the entry completely if the label is now unused
@@ -130,7 +129,7 @@
else:
del self._label_map[label]
# remove all refering entries in edge map
- for edge in self._edge_map.keys():
+ for edge in list(self._edge_map.keys()): # we're editing this!
if edge[0] is node or edge[1] is node:
del self._edge_map[edge]
diff -Nru python-biopython-1.62/Bio/Pathway/Rep/MultiGraph.py python-biopython-1.63/Bio/Pathway/Rep/MultiGraph.py
--- python-biopython-1.62/Bio/Pathway/Rep/MultiGraph.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Pathway/Rep/MultiGraph.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
# get set abstraction for graph representation
+from functools import reduce
+
#TODO - Subclass graph?
class MultiGraph(object):
@@ -28,19 +30,18 @@
return not self.__eq__(g)
def __repr__(self):
- """Returns an unique string representation of this graph."""
+ """Returns a unique string representation of this graph."""
s = ""
def __str__(self):
"""Returns a concise string description of this graph."""
nodenum = len(self._adjacency_list)
- edgenum = reduce(lambda x,y: x+y,
- map(len, self._adjacency_list.values()))
+ edgenum = reduce(lambda x, y: x+y,
+ [len(v) for v in self._adjacency_list.values()])
labelnum = len(self._label_map)
return " node: " + str(child))
parents = []
- for parent, children in self._adjacency_list.iteritems():
+ for parent, children in self._adjacency_list.items():
for x in children:
if x[0] is child:
parents.append((parent, x[1]))
@@ -101,7 +102,7 @@
def parents(self, child):
"""Returns a list of unique parents for child."""
- return sorted(set([x[0] for x in self.parent_edges(child)]))
+ return sorted(set(x[0] for x in self.parent_edges(child)))
def remove_node(self, node):
"""Removes node and all edges connected to it."""
@@ -114,7 +115,7 @@
self._adjacency_list[n] = set(x for x in self._adjacency_list[n]
if x[0] is not node)
# remove all refering pairs in label map
- for label in self._label_map.keys():
+ for label in list(self._label_map.keys()): # we're editing this!
lm = set(x for x in self._label_map[label]
if (x[0] is not node) and (x[1] is not node))
# remove the entry completely if the label is now unused
diff -Nru python-biopython-1.62/Bio/Pathway/__init__.py python-biopython-1.63/Bio/Pathway/__init__.py
--- python-biopython-1.62/Bio/Pathway/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Pathway/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -30,6 +30,8 @@
Comments and feature requests are most welcome.
"""
+from functools import reduce
+
from Bio.Pathway.Rep.MultiGraph import *
@@ -72,7 +74,7 @@
# enforce invariants on reactants:
self.reactants = reactants.copy()
# loop over original, edit the copy
- for r, value in reactants.iteritems():
+ for r, value in reactants.items():
if value == 0:
del self.reactants[r]
self.catalysts = sorted(set(catalysts))
@@ -99,7 +101,7 @@
def __repr__(self):
"""Returns a debugging string representation of self."""
return "Reaction(" + \
- ",".join(map(repr,[self.reactants,
+ ",".join(map(repr, [self.reactants,
self.catalysts,
self.data,
self.reversible])) + ")"
@@ -142,7 +144,7 @@
def species(self):
"""Returns a list of all Species involved in self."""
- return self.reactants.keys()
+ return list(self.reactants.keys())
class System(object):
@@ -162,7 +164,7 @@
def __repr__(self):
"""Returns a debugging string representation of self."""
- return "System(" + ",".join(map(repr,self.__reactions)) + ")"
+ return "System(" + ",".join(map(repr, self.__reactions)) + ")"
def __str__(self):
"""Returns a string representation of self."""
@@ -188,7 +190,7 @@
def species(self):
"""Returns a list of the species in this system."""
- return sorted(set(reduce(lambda s,x: s + x,
+ return sorted(set(reduce(lambda s, x: s + x,
[x.species() for x in self.reactions()], [])))
def stochiometry(self):
diff -Nru python-biopython-1.62/Bio/Phylo/Applications/_Fasttree.py python-biopython-1.63/Bio/Phylo/Applications/_Fasttree.py
--- python-biopython-1.62/Bio/Phylo/Applications/_Fasttree.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/Applications/_Fasttree.py 2013-12-05 14:10:43.000000000 +0000
@@ -3,6 +3,8 @@
# Please see the LICENSE file that should have been included as part of this
# package.
"""Command-line wrapper for tree inference program Fasttree."""
+from __future__ import print_function
+
__docformat__ = "restructuredtext en"
from Bio.Application import _Option, _Switch, _Argument, AbstractCommandline
@@ -38,10 +40,10 @@
>>> import _Fasttree
>>> fasttree_exe = r"C:\FasttreeWin32\fasttree.exe"
>>> cmd = _Fasttree.FastTreeCommandline(fasttree_exe, input=r'C:\Input\ExampleAlignment.fsa', out='C:\Output\ExampleTree.tree')
- >>> print cmd
+ >>> print(cmd)
>>> out, err = cmd()
- >>> print out
- >>> print err
+ >>> print(out)
+ >>> print(err)
Usage advice:
the only parameters needed are (fasttree_exe, input='' out='')
@@ -500,3 +502,4 @@
]
AbstractCommandline.__init__(self, cmd, **kwargs)
+
diff -Nru python-biopython-1.62/Bio/Phylo/Applications/_Phyml.py python-biopython-1.63/Bio/Phylo/Applications/_Phyml.py
--- python-biopython-1.62/Bio/Phylo/Applications/_Phyml.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/Applications/_Phyml.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,6 +5,8 @@
"""Command-line wrapper for the tree inference program PhyML."""
__docformat__ = "restructuredtext en"
+from Bio._py3k import basestring
+
from Bio.Application import _Option, _Switch, AbstractCommandline
diff -Nru python-biopython-1.62/Bio/Phylo/Applications/_Raxml.py python-biopython-1.63/Bio/Phylo/Applications/_Raxml.py
--- python-biopython-1.62/Bio/Phylo/Applications/_Raxml.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/Applications/_Raxml.py 2013-12-05 14:10:43.000000000 +0000
@@ -7,6 +7,9 @@
Derived from the help page for RAxML version 7.3 by Alexandros Stamatakis, but
should work for any version 7.X (and probably earlier for most options).
"""
+from __future__ import print_function
+from Bio._py3k import basestring
+
__docformat__ = "restructuredtext en"
from Bio.Application import _Option, _Switch, AbstractCommandline
@@ -24,7 +27,7 @@
>>> from Bio.Phylo.Applications import RaxmlCommandline
>>> raxml_cline = RaxmlCommandline(sequences="Tests/Phylip/interlaced2.phy",
... model="PROTCATWAG", name="interlaced2")
- >>> print raxml_cline
+ >>> print(raxml_cline)
raxmlHPC -m PROTCATWAG -n interlaced2 -p 10000 -s Tests/Phylip/interlaced2.phy
You would typically run the command line with raxml_cline() or via
@@ -368,3 +371,4 @@
# ENH: enforce -s, -n and -m
if not self.parsimony_seed:
self.parsimony_seed = 10000
+
diff -Nru python-biopython-1.62/Bio/Phylo/Applications/__init__.py python-biopython-1.63/Bio/Phylo/Applications/__init__.py
--- python-biopython-1.62/Bio/Phylo/Applications/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/Applications/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -5,9 +5,9 @@
"""Phylogenetics command line tool wrappers."""
__docformat__ = "restructuredtext en"
-from _Phyml import PhymlCommandline
-from _Raxml import RaxmlCommandline
-from _Fasttree import FastTreeCommandline
+from ._Phyml import PhymlCommandline
+from ._Raxml import RaxmlCommandline
+from ._Fasttree import FastTreeCommandline
#Make this explicit, then they show up in the API docs
__all__ = ["PhymlCommandline",
diff -Nru python-biopython-1.62/Bio/Phylo/BaseTree.py python-biopython-1.63/Bio/Phylo/BaseTree.py
--- python-biopython-1.62/Bio/Phylo/BaseTree.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/BaseTree.py 2013-12-05 14:10:43.000000000 +0000
@@ -10,6 +10,11 @@
"""
__docformat__ = "restructuredtext en"
+from Bio._py3k import zip
+from Bio._py3k import filter
+from Bio._py3k import basestring
+from Bio._py3k import unicode
+
import collections
import copy
import itertools
@@ -57,7 +62,7 @@
singles = []
lists = []
# Sort attributes for consistent results
- for attrname, child in sorted(elem.__dict__.iteritems(),
+ for attrname, child in sorted(elem.__dict__.items(),
key=lambda kv: kv[0]):
if child is None:
continue
@@ -115,7 +120,7 @@
return False
else:
kwa_copy = kwargs
- for key, pattern in kwa_copy.iteritems():
+ for key, pattern in kwa_copy.items():
# Nodes must match all other specified attributes
if not hasattr(node, key):
return False
@@ -230,7 +235,7 @@
return "%s=%s" % (key, val)
return u'%s(%s)' % (self.__class__.__name__,
', '.join(pair_as_kwarg_string(key, val)
- for key, val in self.__dict__.iteritems()
+ for key, val in self.__dict__.items()
if val is not None and
type(val) in (str, int, float, bool, unicode)
))
@@ -260,14 +265,14 @@
order_func = order_opts[order]
except KeyError:
raise ValueError("Invalid order '%s'; must be one of: %s"
- % (order, tuple(order_opts.keys())))
+ % (order, tuple(order_opts)))
if follow_attrs:
get_children = _sorted_attrs
root = self
else:
get_children = lambda elem: elem.clades
root = self.root
- return itertools.ifilter(filter_func, order_func(root, get_children))
+ return filter(filter_func, order_func(root, get_children))
def find_any(self, *args, **kwargs):
"""Return the first element found by find_elements(), or None.
@@ -277,7 +282,7 @@
"""
hits = self.find_elements(*args, **kwargs)
try:
- return hits.next()
+ return next(hits)
except StopIteration:
return None
@@ -320,7 +325,7 @@
>>> from Bio.Phylo.IO import PhyloXMIO
>>> phx = PhyloXMLIO.read('phyloxml_examples.xml')
>>> matches = phx.phylogenies[5].find_elements(code='OCTVU')
- >>> matches.next()
+ >>> next(matches)
Taxonomy(code='OCTVU', scientific_name='Octopus vulgaris')
"""
@@ -413,7 +418,7 @@
if p is None:
raise ValueError("target %s is not in this tree" % repr(t))
mrca = self.root
- for level in itertools.izip(*paths):
+ for level in zip(*paths):
ref = level[0]
for other in level[1:]:
if ref is not other:
@@ -871,7 +876,7 @@
tips = self.get_terminals()
for tip in tips:
self.root_with_outgroup(tip)
- new_max = max(self.depths().iteritems(), key=lambda nd: nd[1])
+ new_max = max(self.depths().items(), key=lambda nd: nd[1])
if new_max[1] > max_distance:
tip1 = tip
tip2 = new_max[0]
@@ -912,7 +917,7 @@
as an output file format.
"""
if format_spec:
- from StringIO import StringIO
+ from Bio._py3k import StringIO
from Bio.Phylo import _io
handle = StringIO()
_io.write([self], handle, format_spec)
@@ -1013,14 +1018,17 @@
"""Number of clades directy under the root."""
return len(self.clades)
- def __nonzero__(self):
- """Boolean value of an instance of this class.
+ #Python 3:
+ def __bool__(self):
+ """Boolean value of an instance of this class (True).
NB: If this method is not defined, but ``__len__`` is, then the object
is considered true if the result of ``__len__()`` is nonzero. We want
Clade instances to always be considered True.
"""
return True
+ #Python 2:
+ __nonzero__ = __bool__
def __str__(self):
if self.name:
@@ -1122,11 +1130,8 @@
len(hexstr) == 7
), "need a 24-bit hexadecimal string, e.g. #000000"
- def unpack(cc):
- return int('0x'+cc, base=16)
-
RGB = hexstr[1:3], hexstr[3:5], hexstr[5:]
- return cls(*map(unpack, RGB))
+ return cls(*[int('0x'+cc, base=16) for cc in RGB])
@classmethod
def from_name(cls, colorname):
diff -Nru python-biopython-1.62/Bio/Phylo/CDAOIO.py python-biopython-1.63/Bio/Phylo/CDAOIO.py
--- python-biopython-1.62/Bio/Phylo/CDAOIO.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/CDAOIO.py 2013-12-05 14:10:43.000000000 +0000
@@ -21,19 +21,21 @@
__docformat__ = "restructuredtext en"
-from cStringIO import StringIO
+from Bio._py3k import StringIO
from Bio.Phylo import CDAO
-from _cdao_owl import cdao_elements, cdao_namespaces, resolve_uri
+from ._cdao_owl import cdao_elements, cdao_namespaces, resolve_uri
import os
-import urlparse
class CDAOError(Exception):
"""Exception raised when CDAO object construction cannot continue."""
pass
-try:
+try:
import rdflib
+ rdfver = rdflib.__version__
+ if rdfver[0] in ["1", "2"] or (rdfver in ["3.0.0", "3.1.0", "3.2.0"]):
+ raise CDAOError('Support for CDAO tree format requires RDFlib v3.2.1 or later.')
except ImportError:
raise CDAOError('Support for CDAO tree format requires RDFlib.')
@@ -48,7 +50,7 @@
def qUri(x):
return resolve_uri(x, namespaces=RDF_NAMESPACES)
-
+
def format_label(x):
return x.replace('_', ' ')
@@ -94,50 +96,50 @@
"""Parse the text stream this object was initialized with."""
self.parse_handle_to_graph(**kwargs)
return self.parse_graph()
-
+
def parse_handle_to_graph(self, rooted=False,
parse_format='turtle', context=None, **kwargs):
'''Parse self.handle into RDF model self.model.'''
-
+
if self.graph is None:
self.graph = rdflib.Graph()
graph = self.graph
-
+
for k, v in RDF_NAMESPACES.items():
graph.bind(k, v)
-
+
self.rooted = rooted
-
+
if 'base_uri' in kwargs:
base_uri = kwargs['base_uri']
else:
base_uri = "file://"+os.path.abspath(self.handle.name)
-
+
graph.parse(file=self.handle, publicID=base_uri, format=parse_format)
-
+
return self.parse_graph(graph, context=context)
-
-
+
+
def parse_graph(self, graph=None, context=None):
'''Generator that yields CDAO.Tree instances from an RDF model.'''
-
+
if graph is None:
graph = self.graph
-
+
# look up branch lengths/TUs for all nodes
self.get_node_info(graph, context=context)
-
+
for root_node in self.tree_roots:
clade = self.parse_children(root_node)
-
+
yield CDAO.Tree(root=clade, rooted=self.rooted)
-
-
+
+
def new_clade(self, node):
'''Returns a CDAO.Clade object for a given named node.'''
-
+
result = self.node_info[node]
-
+
kwargs = {}
if 'branch_length' in result:
kwargs['branch_length'] = result['branch_length']
@@ -145,21 +147,21 @@
kwargs['name'] = result['label'].replace('_', ' ')
if 'confidence' in result:
kwargs['confidence'] = result['confidence']
-
+
clade = CDAO.Clade(**kwargs)
-
+
return clade
-
-
+
+
def get_node_info(self, graph, context=None):
'''Creates a dictionary containing information about all nodes in the tree.'''
-
+
self.node_info = {}
self.obj_info = {}
self.children = {}
self.nodes = set()
self.tree_roots = set()
-
+
assignments = {
qUri('cdao:has_Parent'): 'parent',
qUri('cdao:belongs_to_Edge_as_Child'): 'edge',
@@ -169,21 +171,21 @@
qUri('rdfs:label'): 'label',
qUri('cdao:has_Support_Value'): 'confidence',
}
-
+
for s, v, o in graph:
# process each RDF triple in the graph sequentially
-
+
s, v, o = str(s), str(v), str(o)
-
+
if not s in self.obj_info: self.obj_info[s] = {}
this = self.obj_info[s]
-
+
try:
# if the predicate is one we care about, store information for later
this[assignments[v]] = o
except KeyError:
pass
-
+
if v == qUri('rdf:type'):
if o in (qUri('cdao:AncestralNode'), qUri('cdao:TerminalNode')):
# this is a tree node; store it in set of all nodes
@@ -191,12 +193,12 @@
if v == qUri('cdao:has_Root'):
# this is a tree; store its root in set of all tree roots
self.tree_roots.add(o)
-
+
for node in self.nodes:
# for each node, look up all information needed to create a CDAO.Clade
self.node_info[node] = {}
node_info = self.node_info[node]
-
+
obj = self.obj_info[node]
if 'edge' in obj:
# if this object points to an edge, we need a branch length from
@@ -212,7 +214,7 @@
tu = self.obj_info[obj['tu']]
if 'label' in tu:
node_info['label'] = tu['label']
-
+
if 'parent' in obj:
# store this node as a child of its parent, if it has one,
# so that the tree can be traversed from parent to children
@@ -220,18 +222,18 @@
if not parent in self.children:
self.children[parent] = []
self.children[parent].append(node)
-
-
+
+
def parse_children(self, node):
- '''Return a CDAO.Clade, and calls itself recursively for each child,
- traversing the entire tree and creating a nested structure of CDAO.Clade
+ '''Return a CDAO.Clade, and calls itself recursively for each child,
+ traversing the entire tree and creating a nested structure of CDAO.Clade
objects.'''
-
+
clade = self.new_clade(node)
-
+
children = self.children[node] if node in self.children else []
clade.clades = [self.parse_children(child_node) for child_node in children]
-
+
return clade
@@ -244,30 +246,30 @@
def __init__(self, trees):
self.trees = trees
-
+
self.node_counter = 0
self.edge_counter = 0
self.tu_counter = 0
self.tree_counter = 0
- def write(self, handle, tree_uri='', record_complete_ancestry=False,
+ def write(self, handle, tree_uri='', record_complete_ancestry=False,
rooted=False, **kwargs):
"""Write this instance's trees to a file handle."""
-
+
self.rooted = rooted
self.record_complete_ancestry = record_complete_ancestry
-
+
if tree_uri and not tree_uri.endswith('/'): tree_uri += '/'
-
+
trees = self.trees
-
+
if tree_uri: handle.write('@base <%s>\n' % tree_uri)
for k, v in self.prefixes.items():
- handle.write('@prefix %s: <%s> .\n' % (k,v))
+ handle.write('@prefix %s: <%s> .\n' % (k, v))
handle.write('<%s> a owl:Ontology .\n' % self.prefixes['cdao'])
-
-
+
+
for tree in trees:
self.tree_counter += 1
self.tree_uri = 'tree%s'
@@ -276,8 +278,8 @@
statements = self.process_clade(first_clade, root=tree)
for stmt in statements:
self.add_stmt_to_handle(handle, stmt)
-
-
+
+
def add_stmt_to_handle(self, handle, stmt):
# apply URI prefixes
stmt_strings = []
@@ -295,41 +297,41 @@
elif isinstance(part, rdflib.Literal):
stmt_strings.append(part.n3())
-
+
else:
stmt_strings.append(str(part))
-
+
handle.write('%s .\n' % ' '.join(stmt_strings))
-
+
def process_clade(self, clade, parent=None, root=False):
'''recursively generate triples describing a tree of clades'''
-
+
self.node_counter += 1
clade.uri = 'node%s' % str(self.node_counter).zfill(ZEROES)
if parent: clade.ancestors = parent.ancestors + [parent.uri]
else: clade.ancestors = []
-
+
nUri = lambda s: rdflib.URIRef(s)#':%s' % s
pUri = lambda s: rdflib.URIRef(qUri(s))
tree_id = nUri('')
-
+
statements = []
-
+
if not root is False:
# create a cdao:RootedTree with reference to the tree root
tree_type = pUri('cdao:RootedTree') if self.rooted else pUri('cdao:UnrootedTree')
-
+
statements += [
(tree_id, pUri('rdf:type'), tree_type),
(tree_id, pUri('cdao:has_Root'), nUri(clade.uri)),
]
-
+
try: tree_attributes = root.attributes
except AttributeError: tree_attributes = []
-
+
for predicate, obj in tree_attributes:
statements.append((tree_id, predicate, obj))
-
+
if clade.name:
# create TU
self.tu_counter += 1
@@ -340,20 +342,20 @@
(nUri(clade.uri), pUri('cdao:represents_TU'), nUri(tu_uri)),
(nUri(tu_uri), pUri('rdfs:label'), rdflib.Literal(format_label(clade.name))),
]
-
+
try: tu_attributes = clade.tu_attributes
except AttributeError: tu_attributes = []
-
+
for predicate, obj in tu_attributes:
yield (nUri(tu_uri), predicate, obj)
-
+
# create this node
node_type = 'cdao:TerminalNode' if clade.is_terminal() else 'cdao:AncestralNode'
statements += [
(nUri(clade.uri), pUri('rdf:type'), pUri(node_type)),
(nUri(clade.uri), pUri('cdao:belongs_to_Tree'), tree_id),
]
-
+
if not parent is None:
# create edge from the parent node to this node
self.edge_counter += 1
@@ -368,43 +370,43 @@
(nUri(clade.uri), pUri('cdao:has_Parent'), nUri(parent.uri)),
(nUri(parent.uri), pUri('cdao:belongs_to_Edge_as_Parent'), nUri(edge_uri)),
]
-
+
if hasattr(clade, 'confidence') and not clade.confidence is None:
confidence = rdflib.Literal(clade.confidence, datatype='http://www.w3.org/2001/XMLSchema#decimal')
-
+
statements += [(nUri(clade.uri), pUri('cdao:has_Support_Value'), confidence)]
-
-
+
+
if self.record_complete_ancestry and len(clade.ancestors) > 0:
statements += [(nUri(clade.uri), pUri('cdao:has_Ancestor'), nUri(ancestor))
for ancestor in clade.ancestors]
-
+
if not clade.branch_length is None:
# add branch length
edge_ann_uri = 'edge_annotation%s' % str(self.edge_counter).zfill(ZEROES)
-
+
branch_length = rdflib.Literal(clade.branch_length, datatype=rdflib.URIRef('http://www.w3.org/2001/XMLSchema#decimal'))
statements += [
(nUri(edge_ann_uri), pUri('rdf:type'), pUri('cdao:EdgeLength')),
(nUri(edge_uri), pUri('cdao:has_Annotation'), nUri(edge_ann_uri)),
(nUri(edge_ann_uri), pUri('cdao:has_Value'), branch_length),
]
-
+
try: edge_attributes = clade.edge_attributes
except AttributeError: edge_attributes = []
-
+
for predicate, obj in edge_attributes:
yield (nUri(edge_uri), predicate, obj)
-
+
for stmt in statements:
yield stmt
-
+
try: clade_attributes = clade.attributes
except AttributeError: clade_attributes = []
-
+
for predicate, obj in clade_attributes:
yield (nUri(clade.uri), predicate, obj)
-
+
if not clade.is_terminal():
for new_clade in clade.clades:
for stmt in self.process_clade(new_clade, parent=clade, root=False):
diff -Nru python-biopython-1.62/Bio/Phylo/NeXMLIO.py python-biopython-1.63/Bio/Phylo/NeXMLIO.py
--- python-biopython-1.62/Bio/Phylo/NeXMLIO.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/NeXMLIO.py 2013-12-05 14:10:43.000000000 +0000
@@ -12,12 +12,12 @@
"""
__docformat__ = "restructuredtext en"
-from cStringIO import StringIO
+from Bio._py3k import StringIO
from Bio.Phylo import NeXML
from xml.dom import minidom
import sys
-from _cdao_owl import cdao_elements, cdao_namespaces, resolve_uri
+from ._cdao_owl import cdao_elements, cdao_namespaces, resolve_uri
#For speed try to use cElementTree rather than ElementTree
@@ -54,7 +54,7 @@
def register_namespace(prefix, uri):
ElementTree._namespace_map[uri] = prefix
-for prefix, uri in NAMESPACES.iteritems():
+for prefix, uri in NAMESPACES.items():
register_namespace(prefix, uri)
@@ -180,14 +180,16 @@
# if no root specified, start the recursive tree creation function
# with the first node that's not a child of any other nodes
rooted = False
- possible_roots = (node.attrib['id'] for node in nodes if node.attrib['id'] in srcs and not node.attrib['id'] in tars)
- root = possible_roots.next()
+ possible_roots = (node.attrib['id'] for node in nodes
+ if node.attrib['id'] in srcs
+ and not node.attrib['id'] in tars)
+ root = next(possible_roots)
else:
rooted = True
yield NeXML.Tree(root=self._make_tree(root, node_dict, node_children), rooted=rooted)
-
-
+
+
@classmethod
def _make_tree(cls, node, node_dict, children):
'''Return a NeXML.Clade, and calls itself recursively for each child,
@@ -290,15 +292,15 @@
if not parent is None:
edge_id = self.new_label('edge')
attrib={
- 'id':edge_id, 'source':parent.node_id, 'target':node_id,
- 'length':str(clade.branch_length),
- 'typeof':convert_uri('cdao:Edge'),
+ 'id': edge_id, 'source': parent.node_id, 'target': node_id,
+ 'length': str(clade.branch_length),
+ 'typeof': convert_uri('cdao:Edge'),
}
if hasattr(clade, 'confidence') and not clade.confidence is None:
attrib.update({
- 'property':convert_uri('cdao:has_Support_Value'),
- 'datatype':'xsd:float',
- 'content':'%1.2f' % clade.confidence,
+ 'property': convert_uri('cdao:has_Support_Value'),
+ 'datatype': 'xsd:float',
+ 'content': '%1.2f' % clade.confidence,
})
node = ElementTree.SubElement(tree, 'edge', **attrib)
diff -Nru python-biopython-1.62/Bio/Phylo/NewickIO.py python-biopython-1.63/Bio/Phylo/NewickIO.py
--- python-biopython-1.62/Bio/Phylo/NewickIO.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/NewickIO.py 2013-12-05 14:10:43.000000000 +0000
@@ -12,7 +12,7 @@
__docformat__ = "restructuredtext en"
import re
-from cStringIO import StringIO
+from Bio._py3k import StringIO
from Bio.Phylo import Newick
@@ -33,7 +33,7 @@
(r"\;", 'semicolon'),
(r"\n", 'newline'),
]
-tokenizer = re.compile('(%s)' % '|'.join([token[0] for token in tokens]))
+tokenizer = re.compile('(%s)' % '|'.join(token[0] for token in tokens))
token_dict = dict((name, re.compile(token)) for (token, name) in tokens)
@@ -186,7 +186,7 @@
# if ; token broke out of for loop, there should be no remaining tokens
try:
- next_token = tokens.next()
+ next_token = next(tokens)
raise NewickError('Text after semicolon in Newick tree: %s'
% next_token.group())
except StopIteration:
diff -Nru python-biopython-1.62/Bio/Phylo/NexusIO.py python-biopython-1.63/Bio/Phylo/NexusIO.py
--- python-biopython-1.62/Bio/Phylo/NexusIO.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/NexusIO.py 2013-12-05 14:10:43.000000000 +0000
@@ -68,7 +68,7 @@
for idx, nwk in enumerate(
writer.to_strings(plain=False, plain_newick=True,
**kwargs))]
- tax_labels = map(str, chain(*(t.get_terminals() for t in trees)))
+ tax_labels = [str(x) for x in chain(*(t.get_terminals() for t in trees))]
text = NEX_TEMPLATE % {
'count': len(tax_labels),
'labels': ' '.join(tax_labels),
diff -Nru python-biopython-1.62/Bio/Phylo/PAML/_paml.py python-biopython-1.63/Bio/Phylo/PAML/_paml.py
--- python-biopython-1.62/Bio/Phylo/PAML/_paml.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/PAML/_paml.py 2013-12-05 14:10:43.000000000 +0000
@@ -3,6 +3,8 @@
# license. Please see the LICENSE file that should have been included
# as part of this package.
+from __future__ import print_function
+
import os
import subprocess
@@ -66,7 +68,7 @@
def print_options(self):
"""Print out all of the options and their current settings."""
for option in self._options.items():
- print "%s = %s" % (option[0], option[1])
+ print("%s = %s" % (option[0], option[1]))
def set_options(self, **kwargs):
"""Set the value of an option.
@@ -89,7 +91,7 @@
def get_all_options(self):
"""Return the values of all the options."""
- return self._options.items()
+ return list(self._options.items())
def _set_rel_paths(self):
"""Convert all file/directory locations to paths relative to the current working directory.
diff -Nru python-biopython-1.62/Bio/Phylo/PAML/_parse_codeml.py python-biopython-1.63/Bio/Phylo/PAML/_parse_codeml.py
--- python-biopython-1.62/Bio/Phylo/PAML/_parse_codeml.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/PAML/_parse_codeml.py 2013-12-05 14:10:43.000000000 +0000
@@ -187,7 +187,7 @@
# "lnL(ntime: 19 np: 22): -2021.348300 +0.000000"
if "lnL(ntime:" in line and len(line_floats) > 0:
results["lnL"] = line_floats[0]
- np_res = re.match("lnL\(ntime:\s+\d+\s+np:\s+(\d+)\)",line)
+ np_res = re.match("lnL\(ntime:\s+\d+\s+np:\s+(\d+)\)", line)
if np_res is not None:
num_params = int(np_res.group(1))
# Get parameter list. This can be useful for specifying starting
@@ -337,7 +337,7 @@
float_model_params = []
for param in model_params:
float_model_params.append((param[0], _nan_float(param[1])))
- parameters = dict(parameters.items() + float_model_params)
+ parameters.update(dict(float_model_params))
if len(parameters) > 0:
results["parameters"] = parameters
return results
diff -Nru python-biopython-1.62/Bio/Phylo/PAML/_parse_yn00.py python-biopython-1.63/Bio/Phylo/PAML/_parse_yn00.py
--- python-biopython-1.62/Bio/Phylo/PAML/_parse_yn00.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/PAML/_parse_yn00.py 2013-12-05 14:10:43.000000000 +0000
@@ -19,7 +19,7 @@
# Find all floating point numbers in this line
line_floats_res = re.findall("-*\d+\.\d+", line)
line_floats = [float(val) for val in line_floats_res]
- matrix_row_res = re.match("(.+)\s{5,15}",line)
+ matrix_row_res = re.match("(.+)\s{5,15}", line)
if matrix_row_res is not None:
seq_name = matrix_row_res.group(1).strip()
sequences.append(seq_name)
diff -Nru python-biopython-1.62/Bio/Phylo/PAML/baseml.py python-biopython-1.63/Bio/Phylo/PAML/baseml.py
--- python-biopython-1.62/Bio/Phylo/PAML/baseml.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/PAML/baseml.py 2013-12-05 14:10:43.000000000 +0000
@@ -3,13 +3,10 @@
# license. Please see the LICENSE file that should have been included
# as part of this package.
-# For using with statement in Python 2.5 or Jython
-from __future__ import with_statement
-
import os
import os.path
-from _paml import Paml, _relpath
-import _parse_baseml
+from ._paml import Paml, _relpath
+from . import _parse_baseml
class BasemlError(EnvironmentError):
@@ -104,7 +101,7 @@
with open(ctl_file) as ctl_handle:
for line in ctl_handle:
line = line.strip()
- uncommented = line.split("*",1)[0]
+ uncommented = line.split("*", 1)[0]
if uncommented != "":
if "=" not in uncommented:
raise AttributeError(
@@ -141,8 +138,8 @@
except:
converted_value = value
temp_options[option] = converted_value
- for option in self._options.keys():
- if option in temp_options.keys():
+ for option in self._options:
+ if option in temp_options:
self._options[option] = temp_options[option]
else:
self._options[option] = None
@@ -185,9 +182,8 @@
results = {}
if not os.path.exists(results_file):
raise IOError("Results file does not exist.")
- handle = open(results_file)
- lines = handle.readlines()
- handle.close()
+ with open(results_file) as handle:
+ lines = handle.readlines()
(results, num_params) = _parse_baseml.parse_basics(lines, results)
results = _parse_baseml.parse_parameters(lines, results, num_params)
if results.get("version") is None:
diff -Nru python-biopython-1.62/Bio/Phylo/PAML/codeml.py python-biopython-1.63/Bio/Phylo/PAML/codeml.py
--- python-biopython-1.62/Bio/Phylo/PAML/codeml.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/PAML/codeml.py 2013-12-05 14:10:43.000000000 +0000
@@ -3,13 +3,12 @@
# license. Please see the LICENSE file that should have been included
# as part of this package.
-# For using with statement in Python 2.5 or Jython
-from __future__ import with_statement
+from __future__ import print_function
import os
import os.path
-from _paml import Paml, _relpath
-import _parse_codeml
+from ._paml import Paml, _relpath
+from . import _parse_codeml
class CodemlError(EnvironmentError):
@@ -89,7 +88,7 @@
# NSsites is stored in Python as a list but in the
# control file it is specified as a series of numbers
# separated by spaces.
- NSsites = " ".join([str(site) for site in option[1]])
+ NSsites = " ".join(str(site) for site in option[1])
ctl_handle.write("%s = %s\n" % (option[0], NSsites))
else:
ctl_handle.write("%s = %s\n" % (option[0], option[1]))
@@ -104,7 +103,7 @@
with open(ctl_file) as ctl_handle:
for line in ctl_handle:
line = line.strip()
- uncommented = line.split("*",1)[0]
+ uncommented = line.split("*", 1)[0]
if uncommented != "":
if "=" not in uncommented:
raise AttributeError(
@@ -141,8 +140,8 @@
except:
converted_value = value
temp_options[option] = converted_value
- for option in self._options.keys():
- if option in temp_options.keys():
+ for option in self._options:
+ if option in temp_options:
self._options[option] = temp_options[option]
else:
self._options[option] = None
@@ -154,10 +153,10 @@
# NSsites is stored in Python as a list but in the
# control file it is specified as a series of numbers
# separated by spaces.
- NSsites = " ".join([str(site) for site in option[1]])
- print "%s = %s" % (option[0], NSsites)
+ NSsites = " ".join(str(site) for site in option[1])
+ print("%s = %s" % (option[0], NSsites))
else:
- print "%s = %s" % (option[0], option[1])
+ print("%s = %s" % (option[0], option[1]))
def _set_rel_paths(self):
"""Convert all file/directory locations to paths relative to the current working directory.
@@ -197,9 +196,8 @@
results = {}
if not os.path.exists(results_file):
raise IOError("Results file does not exist.")
- handle = open(results_file)
- lines = handle.readlines()
- handle.close()
+ with open(results_file) as handle:
+ lines = handle.readlines()
(results, multi_models, multi_genes) = _parse_codeml.parse_basics(lines,
results)
results = _parse_codeml.parse_nssites(lines, results, multi_models,
diff -Nru python-biopython-1.62/Bio/Phylo/PAML/yn00.py python-biopython-1.63/Bio/Phylo/PAML/yn00.py
--- python-biopython-1.62/Bio/Phylo/PAML/yn00.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/PAML/yn00.py 2013-12-05 14:10:43.000000000 +0000
@@ -4,8 +4,8 @@
# as part of this package.
import os.path
-from _paml import Paml
-import _parse_yn00
+from ._paml import Paml
+from . import _parse_yn00
#TODO - Restore use of with statement for closing handles automatically
#after dropping Python 2.4
@@ -43,8 +43,7 @@
"""
# Make sure all paths are relative to the working directory
self._set_rel_paths()
- if True: # Dummy statement to preserve indentation for diff
- ctl_handle = open(self.ctl_file, 'w')
+ with open(self.ctl_file, 'w') as ctl_handle:
ctl_handle.write("seqfile = %s\n" % self._rel_alignment)
ctl_handle.write("outfile = %s\n" % self._rel_out_file)
for option in self._options.items():
@@ -54,7 +53,6 @@
# commented out.
continue
ctl_handle.write("%s = %s\n" % (option[0], option[1]))
- ctl_handle.close()
def read_ctl_file(self, ctl_file):
"""Parse a control file and load the options into the yn00 instance.
@@ -63,40 +61,37 @@
if not os.path.isfile(ctl_file):
raise IOError("File not found: %r" % ctl_file)
else:
- ctl_handle = open(ctl_file)
- for line in ctl_handle:
- line = line.strip()
- uncommented = line.split("*",1)[0]
- if uncommented != "":
- if "=" not in uncommented:
- ctl_handle.close()
- raise AttributeError(
- "Malformed line in control file:\n%r" % line)
- (option, value) = uncommented.split("=")
- option = option.strip()
- value = value.strip()
- if option == "seqfile":
- self.alignment = value
- elif option == "outfile":
- self.out_file = value
- elif option not in self._options:
- ctl_handle.close()
- raise KeyError("Invalid option: %s" % option)
- else:
- if "." in value or "e-" in value:
- try:
- converted_value = float(value)
- except:
- converted_value = value
+ with open(ctl_file) as ctl_handle:
+ for line in ctl_handle:
+ line = line.strip()
+ uncommented = line.split("*", 1)[0]
+ if uncommented != "":
+ if "=" not in uncommented:
+ raise AttributeError(
+ "Malformed line in control file:\n%r" % line)
+ (option, value) = uncommented.split("=")
+ option = option.strip()
+ value = value.strip()
+ if option == "seqfile":
+ self.alignment = value
+ elif option == "outfile":
+ self.out_file = value
+ elif option not in self._options:
+ raise KeyError("Invalid option: %s" % option)
else:
- try:
- converted_value = int(value)
- except:
- converted_value = value
- temp_options[option] = converted_value
- ctl_handle.close()
- for option in self._options.keys():
- if option in temp_options.keys():
+ if "." in value or "e-" in value:
+ try:
+ converted_value = float(value)
+ except:
+ converted_value = value
+ else:
+ try:
+ converted_value = int(value)
+ except:
+ converted_value = value
+ temp_options[option] = converted_value
+ for option in self._options:
+ if option in temp_options:
self._options[option] = temp_options[option]
else:
self._options[option] = None
@@ -116,9 +111,8 @@
results = {}
if not os.path.exists(results_file):
raise IOError("Results file does not exist.")
- handle = open(results_file)
- lines = handle.readlines()
- handle.close()
+ with open(results_file) as handle:
+ lines = handle.readlines()
for line_num in range(len(lines)):
line = lines[line_num]
if "(A) Nei-Gojobori (1986) method" in line:
diff -Nru python-biopython-1.62/Bio/Phylo/PhyloXML.py python-biopython-1.63/Bio/Phylo/PhyloXML.py
--- python-biopython-1.62/Bio/Phylo/PhyloXML.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/PhyloXML.py 2013-12-05 14:10:43.000000000 +0000
@@ -17,6 +17,8 @@
import re
import warnings
+from Bio._py3k import basestring
+
from Bio import Alphabet
from Bio.Align import MultipleSeqAlignment
from Bio.Seq import Seq
@@ -223,7 +225,7 @@
return False
seqs = self._filter_search(is_aligned_seq, 'preorder', True)
try:
- first_seq = seqs.next()
+ first_seq = next(seqs)
except StopIteration:
# No aligned sequences were found --> empty MSA
return MultipleSeqAlignment([])
@@ -761,15 +763,16 @@
self.confidence = confidence
def items(self):
- return [(k, v) for k, v in self.__dict__.iteritems() if v is not None]
+ return [(k, v) for k, v in self.__dict__.items() if v is not None]
def keys(self):
- return [k for k, v in self.__dict__.iteritems() if v is not None]
+ return [k for k, v in self.__dict__.items() if v is not None]
def values(self):
- return [v for v in self.__dict__.itervalues() if v is not None]
+ return [v for v in self.__dict__.values() if v is not None]
def __len__(self):
+ #TODO - Better way to do this?
return len(self.values())
def __getitem__(self, key):
@@ -1118,7 +1121,7 @@
"""
def clean_dict(dct):
"""Remove None-valued items from a dictionary."""
- return dict((key, val) for key, val in dct.iteritems()
+ return dict((key, val) for key, val in dct.items()
if val is not None)
seqrec = SeqRecord(Seq(self.mol_seq.value, self.get_alphabet()),
diff -Nru python-biopython-1.62/Bio/Phylo/PhyloXMLIO.py python-biopython-1.63/Bio/Phylo/PhyloXMLIO.py
--- python-biopython-1.62/Bio/Phylo/PhyloXMLIO.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/PhyloXMLIO.py 2013-12-05 14:10:43.000000000 +0000
@@ -20,6 +20,9 @@
import sys
+from Bio._py3k import basestring
+from Bio._py3k import unicode
+
from Bio.Phylo import PhyloXML as PX
#For speed try to use cElementTree rather than ElementTree
@@ -50,7 +53,7 @@
def register_namespace(prefix, uri):
ElementTree._namespace_map[uri] = prefix
-for prefix, uri in NAMESPACES.iteritems():
+for prefix, uri in NAMESPACES.items():
register_namespace(prefix, uri)
@@ -215,9 +218,9 @@
def _str2bool(text):
- if text == 'true':
+ if text == 'true' or text=='1':
return True
- if text == 'false':
+ if text == 'false' or text=='0':
return False
raise ValueError('String could not be converted to boolean: ' + text)
@@ -284,7 +287,7 @@
def __init__(self, file):
# Get an iterable context for XML parsing events
context = iter(ElementTree.iterparse(file, events=('start', 'end')))
- event, root = context.next()
+ event, root = next(context)
self.root = root
self.context = context
@@ -374,8 +377,8 @@
'reference': 'references',
'property': 'properties',
}
- _clade_tracked_tags = set(_clade_complex_types + _clade_list_types.keys()
- + ['branch_length', 'name', 'node_id', 'width'])
+ _clade_tracked_tags = set(_clade_complex_types).union(_clade_list_types.keys()).union(
+ ['branch_length', 'name', 'node_id', 'width'])
def _parse_clade(self, parent):
"""Parse a Clade node and its children, recursively."""
diff -Nru python-biopython-1.62/Bio/Phylo/_io.py python-biopython-1.63/Bio/Phylo/_io.py
--- python-biopython-1.62/Bio/Phylo/_io.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/_io.py 2013-12-05 14:10:43.000000000 +0000
@@ -8,8 +8,8 @@
This API follows the same semantics as Biopython's `SeqIO` and `AlignIO`.
"""
-# For with on Python/Jython 2.5
-from __future__ import with_statement
+from __future__ import print_function
+
__docformat__ = "restructuredtext en"
from Bio import File
@@ -46,7 +46,7 @@
>>> trees = parse('../../Tests/PhyloXML/apaf.xml', 'phyloxml')
>>> for tree in trees:
- ... print tree.rooted
+ ... print(tree.rooted)
True
"""
with File.as_handle(file, 'r') as fp:
@@ -62,11 +62,11 @@
"""
try:
tree_gen = parse(file, format, **kwargs)
- tree = tree_gen.next()
+ tree = next(tree_gen)
except StopIteration:
raise ValueError("There are no trees in this file.")
try:
- tree_gen.next()
+ next(tree_gen)
except StopIteration:
return tree
else:
@@ -88,3 +88,4 @@
"""Convert between two tree file formats."""
trees = parse(in_file, in_format, **parse_args)
return write(trees, out_file, out_format, **kwargs)
+
diff -Nru python-biopython-1.62/Bio/Phylo/_utils.py python-biopython-1.63/Bio/Phylo/_utils.py
--- python-biopython-1.62/Bio/Phylo/_utils.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Phylo/_utils.py 2013-12-05 14:10:43.000000000 +0000
@@ -136,7 +136,19 @@
"Install NetworkX if you want to use to_networkx.")
G = to_networkx(tree)
- Gi = networkx.convert_node_labels_to_integers(G, discard_old_labels=False)
+ try:
+ # NetworkX version 1.8 or later (2013-01-20)
+ Gi = networkx.convert_node_labels_to_integers(G,
+ label_attribute='label')
+ int_labels = {}
+ for integer, nodeattrs in Gi.node.items():
+ int_labels[nodeattrs['label']] = integer
+ except TypeError:
+ # Older NetworkX versions (before 1.8)
+ Gi = networkx.convert_node_labels_to_integers(G,
+ discard_old_labels=False)
+ int_labels = Gi.node_labels
+
try:
posi = networkx.graphviz_layout(Gi, prog, args=args)
except ImportError:
@@ -144,6 +156,7 @@
"Install PyGraphviz or pydot if you want to use draw_graphviz.")
def get_label_mapping(G, selection):
+ """Apply the user-specified node relabeling."""
for node in G.nodes():
if (selection is None) or (node in selection):
try:
@@ -157,7 +170,7 @@
labels = dict(get_label_mapping(G, set(kwargs['nodelist'])))
else:
labels = dict(get_label_mapping(G, None))
- kwargs['nodelist'] = labels.keys()
+ kwargs['nodelist'] = list(labels.keys())
if 'edge_color' not in kwargs:
kwargs['edge_color'] = [isinstance(e[2], dict) and
e[2].get('color', 'k') or 'k'
@@ -167,7 +180,7 @@
e[2].get('width', 1.0) or 1.0
for e in G.edges(data=True)]
- posn = dict((n, posi[Gi.node_labels[n]]) for n in G)
+ posn = dict((n, posi[int_labels[n]]) for n in G)
networkx.draw(G, posn, labels=labels, node_color=node_color, **kwargs)
@@ -203,14 +216,14 @@
"""Create a mapping of each clade to its column position."""
depths = tree.depths()
# If there are no branch lengths, assume unit branch lengths
- if not max(depths.itervalues()):
+ if not max(depths.values()):
depths = tree.depths(unit_branch_lengths=True)
# Potential drawing overflow due to rounding -- 1 char per tree layer
fudge_margin = int(math.ceil(math.log(len(taxa), 2)))
cols_per_branch_unit = ((drawing_width - fudge_margin)
- / float(max(depths.itervalues())))
+ / float(max(depths.values())))
return dict((clade, int(round(blen*cols_per_branch_unit + 0.5)))
- for clade, blen in depths.iteritems())
+ for clade, blen in depths.items())
def get_row_positions(tree):
positions = dict((taxon, 2*idx) for idx, taxon in enumerate(taxa))
@@ -357,7 +370,7 @@
"""
depths = tree.depths()
# If there are no branch lengths, assume unit branch lengths
- if not max(depths.itervalues()):
+ if not max(depths.values()):
depths = tree.depths(unit_branch_lengths=True)
return depths
@@ -406,12 +419,12 @@
axes.hlines(y_here, x_start, x_here, color=color, lw=lw)
elif (use_linecollection==True and orientation=='horizontal'):
horizontal_linecollections.append(mpcollections.LineCollection(
- [[(x_start,y_here), (x_here,y_here)]], color=color, lw=lw),)
+ [[(x_start, y_here), (x_here, y_here)]], color=color, lw=lw),)
elif (use_linecollection==False and orientation=='vertical'):
axes.vlines(x_here, y_bot, y_top, color=color)
elif (use_linecollection==True and orientation=='vertical'):
vertical_linecollections.append(mpcollections.LineCollection(
- [[(x_here,y_bot), (x_here,y_top)]], color=color, lw=lw),)
+ [[(x_here, y_bot), (x_here, y_top)]], color=color, lw=lw),)
def draw_clade(clade, x_start, color, lw):
"""Recursively draw a tree, down from the given clade."""
@@ -424,7 +437,7 @@
lw = clade.width * plt.rcParams['lines.linewidth']
# Draw a horizontal line from start to here
draw_clade_lines(use_linecollection=True, orientation='horizontal',
- y_here=y_here, x_start=x_start, x_here=x_here, color='black', lw=lw)
+ y_here=y_here, x_start=x_start, x_here=x_here, color=color, lw=lw)
# Add node/taxon labels
label = label_func(clade)
if label not in (None, clade.__class__.__name__):
@@ -440,7 +453,7 @@
y_bot = y_posns[clade.clades[-1]]
# Only apply widths to horizontal lines, like Archaeopteryx
draw_clade_lines(use_linecollection=True, orientation='vertical',
- x_here=x_here, y_bot=y_bot, y_top=y_top, color='black', lw=lw)
+ x_here=x_here, y_bot=y_bot, y_top=y_top, color=color, lw=lw)
# Draw descendents
for child in clade:
draw_clade(child, x_here, color, lw)
@@ -461,14 +474,14 @@
axes.set_xlabel('branch length')
axes.set_ylabel('taxa')
# Add margins around the tree to prevent overlapping the axes
- xmax = max(x_posns.itervalues())
+ xmax = max(x_posns.values())
axes.set_xlim(-0.05 * xmax, 1.25 * xmax)
# Also invert the y-axis (origin at the top)
# Add a small vertical margin, but avoid including 0 and N+1 on the y axis
- axes.set_ylim(max(y_posns.itervalues()) + 0.8, 0.2)
+ axes.set_ylim(max(y_posns.values()) + 0.8, 0.2)
# Parse and process key word arguments as pyplot options
- for key, value in kwargs.iteritems():
+ for key, value in kwargs.items():
try:
# Check that the pyplot option input is iterable, as required
[i for i in value]
diff -Nru python-biopython-1.62/Bio/PopGen/Async/Local.py python-biopython-1.63/Bio/PopGen/Async/Local.py
--- python-biopython-1.62/Bio/PopGen/Async/Local.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PopGen/Async/Local.py 2013-12-05 14:10:43.000000000 +0000
@@ -11,14 +11,14 @@
from Bio.PopGen.Async import Async
-import thread
+import threading
class Local(Async):
'''Execution on Local machine.
'''
- def __init__(self, num_cores = 1):
+ def __init__(self, num_cores=1):
'''Constructor.
parameters:
@@ -41,7 +41,7 @@
self.waiting.append((id, hook, parameters, input_files))
if self.cores_used < self.num_cores:
self.cores_used += 1
- thread.start_new_thread(self.start_work, ())
+ threading.Thread(target=self.start_work).run()
self.access_ds.release()
def start_work(self):
diff -Nru python-biopython-1.62/Bio/PopGen/Async/__init__.py python-biopython-1.63/Bio/PopGen/Async/__init__.py
--- python-biopython-1.62/Bio/PopGen/Async/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PopGen/Async/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -10,7 +10,7 @@
'''
import os
-import thread
+import threading
class Async(object):
@@ -32,7 +32,7 @@
self.done = {}
self.id = 0
self.hooks = {}
- self.access_ds = thread.allocate_lock()
+ self.access_ds = threading.Lock()
def run_program(self, program, parameters, input_files):
'''Runs a program.
@@ -96,7 +96,7 @@
'''
def __init__(self):
- self.file_list=[]
+ self.file_list = []
def get_File_list(self):
'''Returns the list of available files.
@@ -117,7 +117,7 @@
walk_list = os.walk(directory)
for dir, dir_list, file_list in walk_list:
for file in file_list:
- self.file_list.append(file[len(directory)+1:])
+ self.file_list.append(file[len(directory) + 1:])
def get_file(self, name):
return open(self.directory + os.sep + name)
diff -Nru python-biopython-1.62/Bio/PopGen/FDist/Async.py python-biopython-1.63/Bio/PopGen/FDist/Async.py
--- python-biopython-1.62/Bio/PopGen/FDist/Async.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PopGen/FDist/Async.py 2013-12-05 14:10:43.000000000 +0000
@@ -3,21 +3,19 @@
# license. Please see the LICENSE file that should have been included
# as part of this package.
-
-"""
-This modules allows for asynchronous execution of Fdist and
- spliting of loads.
+"""Asynchronous execution of Fdist and spliting of loads.
FDistAsync Allows for the execution of FDist.
SplitFDist splits a single Fdist execution in several, taking advantage
- of multi-core architectures.
-
+of multi-core architectures.
"""
+from __future__ import print_function
+
import os
import shutil
-import thread
+import threading
from time import sleep
from Bio.PopGen.Async import Local
from Bio.PopGen.FDist.Controller import FDistController
@@ -27,7 +25,7 @@
"""Asynchronous FDist execution.
"""
- def __init__(self, fdist_dir = "", ext = None):
+ def __init__(self, fdist_dir="", ext=None):
"""Constructor.
Parameters:
@@ -57,9 +55,9 @@
beta = parameters.get('beta', (0.25, 0.25))
max_freq = parameters.get('max_freq', 0.99)
fst = self.run_fdist(npops, nsamples, fst, sample_size,
- mut, num_sims, data_dir,
- is_dominant, theta, beta,
- max_freq)
+ mut, num_sims, data_dir,
+ is_dominant, theta, beta,
+ max_freq)
output_files = {}
output_files['out.dat'] = open(data_dir + os.sep + 'out.dat', 'r')
return fst, output_files
@@ -76,8 +74,8 @@
Each SplitFDist object can only be used to run a single FDist
simulation.
"""
- def __init__(self, report_fun = None,
- num_thr = 2, split_size = 1000, fdist_dir = '', ext = None):
+ def __init__(self, report_fun=None,
+ num_thr=2, split_size=1000, fdist_dir='', ext=None):
"""Constructor.
Parameters:
@@ -108,22 +106,20 @@
while(True):
sleep(1)
self.async.access_ds.acquire()
- keys = self.async.done.keys()[:]
+ keys = list(self.async.done.keys()) #copy it
self.async.access_ds.release()
for done in keys:
self.async.access_ds.acquire()
fst, files = self.async.done[done]
del self.async.done[done]
out_dat = files['out.dat']
- f = open(self.data_dir + os.sep + 'out.dat','a')
- f.writelines(out_dat.readlines())
- f.close()
+ with open(self.data_dir + os.sep + 'out.dat', 'a') as f:
+ f.writelines(out_dat.readlines())
out_dat.close()
self.async.access_ds.release()
for file in os.listdir(self.parts[done]):
os.remove(self.parts[done] + os.sep + file)
os.rmdir(self.parts[done])
- #print fst, out_dat
if self.report_fun:
self.report_fun(fst)
self.async.access_ds.acquire()
@@ -131,9 +127,6 @@
and len(self.async.done) == 0:
break
self.async.access_ds.release()
- #print 'R', self.async.running
- #print 'W', self.async.waiting
- #print 'R', self.async.running
def acquire(self):
"""Allows the external acquisition of the lock.
@@ -147,9 +140,9 @@
#You can only run a fdist case at a time
def run_fdist(self, npops, nsamples, fst, sample_size,
- mut = 0, num_sims = 20000, data_dir='.',
- is_dominant = False, theta = 0.06, beta = (0.25, 0.25),
- max_freq = 0.99):
+ mut=0, num_sims=20000, data_dir='.',
+ is_dominant=False, theta=0.06, beta=(0.25, 0.25),
+ max_freq=0.99):
"""Runs FDist.
Parameters can be seen on FDistController.run_fdist.
@@ -157,7 +150,7 @@
It will split a single execution in several parts and
create separated data directories.
"""
- num_parts = num_sims/self.split_size
+ num_parts = num_sims // self.split_size
self.parts = {}
self.data_dir = data_dir
for directory in range(num_parts):
@@ -182,4 +175,4 @@
'max_freq' : max_freq
}, {})
self.parts[id] = full_path
- thread.start_new_thread(self.monitor, ())
+ threading.Thread(target=self.monitor).run()
diff -Nru python-biopython-1.62/Bio/PopGen/FDist/Controller.py python-biopython-1.63/Bio/PopGen/FDist/Controller.py
--- python-biopython-1.62/Bio/PopGen/FDist/Controller.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PopGen/FDist/Controller.py 2013-12-05 14:10:43.000000000 +0000
@@ -19,11 +19,6 @@
from time import strftime, clock
#from logging import debug
-if sys.version_info[0] == 3:
- maxint = sys.maxsize
-else:
- maxint = sys.maxint
-
def my_float(f):
#Because of Jython, mostly
@@ -117,11 +112,10 @@
Parameter:
data_dir - data directory
"""
- inf = open(data_dir + os.sep + 'INTFILE', 'w')
- for i in range(98):
- inf.write(str(randint(-maxint + 1, maxint - 1)) + '\n')
- inf.write('8\n')
- inf.close()
+ with open(data_dir + os.sep + 'INTFILE', 'w') as inf:
+ for i in range(98):
+ inf.write(str(randint(-sys.maxsize + 1, sys.maxsize - 1)) + '\n')
+ inf.write('8\n')
def run_fdist(self, npops, nsamples, fst, sample_size,
mut=0, num_sims=50000, data_dir='.',
@@ -159,20 +153,19 @@
else:
config_name = "fdist_params2.dat"
- f = open(data_dir + os.sep + config_name, 'w')
- f.write(str(npops) + '\n')
- f.write(str(nsamples) + '\n')
- f.write(str(fst) + '\n')
- f.write(str(sample_size) + '\n')
- if is_dominant:
- f.write(str(theta) + '\n')
- else:
- f.write(str(mut) + '\n')
- f.write(str(num_sims) + '\n')
- if is_dominant:
- f.write("%f %f\n" % beta)
- f.write("%f\n" % max_freq)
- f.close()
+ with open(data_dir + os.sep + config_name, 'w') as f:
+ f.write(str(npops) + '\n')
+ f.write(str(nsamples) + '\n')
+ f.write(str(fst) + '\n')
+ f.write(str(sample_size) + '\n')
+ if is_dominant:
+ f.write(str(theta) + '\n')
+ else:
+ f.write(str(mut) + '\n')
+ f.write(str(num_sims) + '\n')
+ if is_dominant:
+ f.write("%f %f\n" % beta)
+ f.write("%f\n" % max_freq)
self._generate_intfile(data_dir)
@@ -256,19 +249,16 @@
"data_fst_outfile out.cpl out.dat",
str(ci), str(smooth)]))
- f = open(data_dir + os.sep + 'out.cpl')
- conf_lines = []
- l = f.readline()
- try:
- while l != '':
- conf_lines.append(
- tuple(map(lambda x: my_float(x),
- l.rstrip().split(' '))))
- l = f.readline()
- except ValueError:
- f.close()
- return []
- f.close()
+ with open(data_dir + os.sep + 'out.cpl') as f:
+ conf_lines = []
+ l = f.readline()
+ try:
+ while l != '':
+ conf_lines.append(
+ tuple(my_float(x) for x in l.rstrip().split(' ')))
+ l = f.readline()
+ except ValueError:
+ return []
return conf_lines
def run_pv(self, out_file='probs.dat', data_dir='.',
@@ -293,9 +283,6 @@
universal_newlines=True)
proc.communicate('data_fst_outfile ' + out_file +
' out.dat\n' + str(smooth) + '\n')
- pvf = open(data_dir + os.sep + out_file, 'r')
- result = map(lambda x: tuple(map(lambda y:
- my_float(y), x.rstrip().split(' '))),
- pvf.readlines())
- pvf.close()
+ with open(data_dir + os.sep + out_file, 'r') as pvf:
+ result = [tuple(my_float(y) for y in x.rstrip().split(' ')) for x in pvf.readlines()]
return result
diff -Nru python-biopython-1.62/Bio/PopGen/FDist/Utils.py python-biopython-1.63/Bio/PopGen/FDist/Utils.py
--- python-biopython-1.62/Bio/PopGen/FDist/Utils.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PopGen/FDist/Utils.py 2013-12-05 14:10:43.000000000 +0000
@@ -4,6 +4,8 @@
# as part of this package.
+from __future__ import print_function
+
from Bio.PopGen.GenePop import FileParser
import Bio.PopGen.FDist
@@ -106,7 +108,7 @@
for al in lParser[1][loci_pos]:
if al is not None:
loci[loci_pos].add(al)
- curr_pop[loci_pos][al]= curr_pop[loci_pos].get(al,0)+1
+ curr_pop[loci_pos][al]= curr_pop[loci_pos].get(al, 0)+1
else:
pops.append(curr_pop)
num_pops += 1
@@ -118,8 +120,7 @@
pops.append(curr_pop)
fd_rec.num_pops = num_pops
for loci_pos in range(num_loci):
- alleles = list(loci[loci_pos])
- alleles.sort()
+ alleles = sorted(loci[loci_pos])
loci_rec = [len(alleles), []]
for pop in pops:
pop_rec = []
diff -Nru python-biopython-1.62/Bio/PopGen/FDist/__init__.py python-biopython-1.63/Bio/PopGen/FDist/__init__.py
--- python-biopython-1.62/Bio/PopGen/FDist/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PopGen/FDist/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -27,17 +27,17 @@
handle is a file-like object that contains a FDist record.
"""
record = Record()
- record.data_org = int(str(handle.next()).rstrip())
- record.num_pops = int(str(handle.next()).rstrip())
- record.num_loci = int(str(handle.next()).rstrip())
+ record.data_org = int(str(next(handle)).rstrip())
+ record.num_pops = int(str(next(handle)).rstrip())
+ record.num_loci = int(str(next(handle)).rstrip())
for i in range(record.num_loci):
- handle.next()
- num_alleles = int(str(handle.next()).rstrip())
+ next(handle)
+ num_alleles = int(str(next(handle)).rstrip())
pops_data = []
if record.data_org==0:
for j in range(record.num_pops):
- line_comp = str(handle.next()).rstrip().split(' ')
- pop_dist = map(lambda x: int(x), line_comp)
+ line_comp = str(next(handle)).rstrip().split(' ')
+ pop_dist = [int(x) for x in line_comp]
pops_data.append(pop_dist)
else:
raise NotImplementedError('1/alleles by rows not implemented')
diff -Nru python-biopython-1.62/Bio/PopGen/GenePop/Controller.py python-biopython-1.63/Bio/PopGen/GenePop/Controller.py
--- python-biopython-1.62/Bio/PopGen/GenePop/Controller.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PopGen/GenePop/Controller.py 2013-12-05 14:10:43.000000000 +0000
@@ -12,6 +12,8 @@
import re
import shutil
+import sys # for checking if under Python 2
+
from Bio.Application import AbstractCommandline, _Argument
@@ -41,16 +43,16 @@
if 'No data' in l:
return None, None
l = f.readline()
- alleles = filter(lambda x: x != '', f.readline().rstrip().split(" "))
- alleles = map(lambda x: _gp_int(x), alleles)
+ alleles = [x for x in f.readline().rstrip().split(" ") if x != '']
+ alleles = [_gp_int(x) for x in alleles]
l = f.readline().rstrip()
table = []
while l != "":
- line = filter(lambda x: x != '', l.split(" "))
+ line = [x for x in l.split(" ") if x != '']
try:
table.append(
(line[0],
- map(lambda x: _gp_float(x), line[1:-1]),
+ [_gp_float(x) for x in line[1:-1]],
_gp_int(line[-1])))
except ValueError:
table.append(
@@ -68,7 +70,7 @@
l = f.readline().rstrip()
l = f.readline().rstrip()
while '===' not in l and '---' not in l and l != "":
- toks = filter(lambda x: x != "", l.split(" "))
+ toks = [x for x in l.split(" ") if x != ""]
line = []
for i in range(len(toks)):
try:
@@ -85,8 +87,7 @@
l = f.readline().rstrip()
while l != "":
matrix.append(
- map(lambda x: _gp_float(x),
- filter(lambda y: y != "", l.split(" "))))
+ [_gp_float(x) for x in [y for y in l.split(" ") if y != ""]])
l = f.readline().rstrip()
return matrix
@@ -96,10 +97,10 @@
header = f.readline().rstrip()
if '---' in header or '===' in header:
header = f.readline().rstrip()
- nlines = len(filter(lambda x:x != '', header.split(' '))) - 1
+ nlines = len([x for x in header.split(' ') if x != '']) - 1
for line_pop in range(nlines):
l = f.readline().rstrip()
- vals = filter(lambda x:x != '', l.split(' ')[1:])
+ vals = [x for x in l.split(' ')[1:] if x != '']
clean_vals = []
for val in vals:
try:
@@ -122,7 +123,7 @@
stream.readline()
stream.readline()
stream.readline()
- table = _read_table(stream,[str,_gp_float,_gp_float,_gp_float,_gp_float,_gp_int,str])
+ table = _read_table(stream, [str, _gp_float, _gp_float, _gp_float, _gp_float, _gp_int, str])
#loci might mean pop if hook="Locus "
loci = {}
for entry in table:
@@ -145,9 +146,14 @@
The generator function is expected to yield a tuple, while
consuming input
"""
- def __init__(self, func, stream, fname):
+ def __init__(self, func, fname, handle=None):
self.func = func
- self.stream = stream
+ if handle is None:
+ self.stream = open(fname)
+ else:
+ # For special cases where calling code wants to
+ # seek into the file before starting:
+ self.stream = handle
self.fname = fname
self.done = False
@@ -157,9 +163,19 @@
raise StopIteration
return self
- def next(self):
+ def __next__(self):
return self.func(self)
+ if sys.version_info[0] < 3:
+ def next(self):
+ """Deprecated Python 2 style alias for Python 3 style __next__ method."""
+ import warnings
+ from Bio import BiopythonDeprecationWarning
+ warnings.warn("Please use next(my_iterator) instead of my_iterator.next(), "
+ "the .next() method is deprecated and will be removed in a "
+ "future release of Biopython.", BiopythonDeprecationWarning)
+ return self.__next__()
+
def __del__(self):
self.stream.close()
try:
@@ -206,8 +222,8 @@
Example set_menu([6,1]) = get all F statistics (menu 6.1)
"""
- self.set_parameter("command", "MenuOptions="+
- ".".join(map(lambda x:str(x),option_list)))
+ self.set_parameter("command", "MenuOptions=" +
+ ".".join(str(x) for x in option_list))
def set_input(self, fname):
"""Sets the input file name.
@@ -279,12 +295,11 @@
"""
opts = self._get_opts(dememorization, batches, iterations, enum_test)
self._run_genepop([ext], [1, type], fname, opts)
- f = open(fname + ext)
def hw_func(self):
return _hw_func(self.stream, False)
- return _FileIterator(hw_func, f, fname + ext)
+ return _FileIterator(hw_func, fname + ext)
def _test_global_hz_both(self, fname, type, ext, enum_test = True,
dememorization = 10000, batches = 20,
@@ -309,28 +324,26 @@
def hw_pop_func(self):
return _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float])
- f1 = open(fname + ext)
- l = f1.readline()
- while "by population" not in l:
+ with open(fname + ext) as f1:
l = f1.readline()
- pop_p = _read_table(f1, [str, _gp_float, _gp_float, _gp_float])
- f2 = open(fname + ext)
- l = f2.readline()
- while "by locus" not in l:
+ while "by population" not in l:
+ l = f1.readline()
+ pop_p = _read_table(f1, [str, _gp_float, _gp_float, _gp_float])
+ with open(fname + ext) as f2:
l = f2.readline()
- loc_p = _read_table(f2, [str, _gp_float, _gp_float, _gp_float])
- f = open(fname + ext)
- l = f.readline()
- while "all locus" not in l:
+ while "by locus" not in l:
+ l = f2.readline()
+ loc_p = _read_table(f2, [str, _gp_float, _gp_float, _gp_float])
+ with open(fname + ext) as f:
l = f.readline()
- f.readline()
- f.readline()
- f.readline()
- f.readline()
- l = f.readline().rstrip()
- p, se, switches = tuple(map(lambda x: _gp_float(x),
- filter(lambda y: y != "",l.split(" "))))
- f.close()
+ while "all locus" not in l:
+ l = f.readline()
+ f.readline()
+ f.readline()
+ f.readline()
+ f.readline()
+ l = f.readline().rstrip()
+ p, se, switches = tuple(_gp_float(x) for x in [y for y in l.split(" ") if y != ""])
return pop_p, loc_p, (p, se, switches)
#1.1
@@ -391,9 +404,8 @@
return _hw_func(self.stream, False, True)
shutil.copyfile(fname+".P", fname+".P2")
- f1 = open(fname + ".P")
- f2 = open(fname + ".P2")
- return _FileIterator(hw_prob_loci_func, f1, fname + ".P"), _FileIterator(hw_prob_pop_func, f2, fname + ".P2")
+
+ return _FileIterator(hw_prob_loci_func, fname + ".P"), _FileIterator(hw_prob_pop_func, fname + ".P2")
#1.4
def test_global_hz_deficiency(self, fname, enum_test = True,
@@ -447,7 +459,7 @@
if l == "":
self.done = True
raise StopIteration
- toks = filter(lambda x: x != "", l.split(" "))
+ toks = [x for x in l.split(" ") if x != ""]
pop, locus1, locus2 = toks[0], toks[1], toks[2]
if not hasattr(self, "start_locus1"):
start_locus1, start_locus2 = locus1, locus2
@@ -464,7 +476,7 @@
if l == "":
self.done = True
raise StopIteration
- toks = filter(lambda x: x != "", l.split(" "))
+ toks = [x for x in l.split(" ") if x != ""]
locus1, locus2 = toks[0], toks[2]
try:
chi2, df, p = _gp_float(toks[3]), _gp_int(toks[4]), _gp_float(toks[5])
@@ -483,7 +495,7 @@
l = f2.readline()
while "----" not in l:
l = f2.readline()
- return _FileIterator(ld_pop_func, f1, fname+".DIS"), _FileIterator(ld_func, f2, fname + ".DI2")
+ return _FileIterator(ld_pop_func, fname+".DIS", f1), _FileIterator(ld_func, fname + ".DI2", f2)
#2.2
def create_contingency_tables(self, fname):
@@ -512,9 +524,8 @@
#4
def estimate_nm(self, fname):
self._run_genepop(["PRI"], [4], fname)
- f = open(fname + ".PRI")
- lines = f.readlines() # Small file, it is ok
- f.close()
+ with open(fname + ".PRI") as f:
+ lines = f.readlines() # Small file, it is ok
for line in lines:
m = re.search("Mean sample size: ([.0-9]+)", line)
if m is not None:
@@ -568,21 +579,20 @@
Will create a file called fname.INF
"""
- self._run_genepop(["INF"], [5,1], fname)
+ self._run_genepop(["INF"], [5, 1], fname)
#First pass, general information
#num_loci = None
#num_pops = None
- #f = open(fname + ".INF")
- #l = f.readline()
- #while (num_loci is None or num_pops is None) and l != '':
- # m = re.search("Number of populations detected : ([0-9+])", l)
- # if m is not None:
- # num_pops = _gp_int(m.group(1))
- # m = re.search("Number of loci detected : ([0-9+])", l)
- # if m is not None:
- # num_loci = _gp_int(m.group(1))
- # l = f.readline()
- #f.close()
+ #with open(fname + ".INF") as f:
+ #l = f.readline()
+ #while (num_loci is None or num_pops is None) and l != '':
+ #m = re.search("Number of populations detected : ([0-9+])", l)
+ #if m is not None:
+ #num_pops = _gp_int(m.group(1))
+ #m = re.search("Number of loci detected : ([0-9+])", l)
+ #if m is not None:
+ #num_loci = _gp_int(m.group(1))
+ #l = f.readline()
def pop_parser(self):
if hasattr(self, "old_line"):
@@ -622,7 +632,7 @@
l = self.stream.readline()
while l != "\n":
- m2 = re.match(" +([0-9]+) , ([0-9]+) *([0-9]+) *(.+)",l)
+ m2 = re.match(" +([0-9]+) , ([0-9]+) *([0-9]+) *(.+)", l)
if m2 is not None:
geno_list.append((_gp_int(m2.group(1)), _gp_int(m2.group(2)),
_gp_int(m2.group(3)), _gp_float(m2.group(4))))
@@ -648,8 +658,7 @@
freq_fis={}
overall_fis = None
while "----" not in l:
- vals = filter(lambda x: x!='',
- l.rstrip().split(' '))
+ vals = [x for x in l.rstrip().split(' ') if x!='']
if vals[0]=="Tot":
overall_fis = _gp_int(vals[1]), \
_gp_float(vals[2]), _gp_float(vals[3])
@@ -676,24 +685,21 @@
self.done = True
raise StopIteration
- popf = open(fname + ".INF")
shutil.copyfile(fname + ".INF", fname + ".IN2")
- locf = open(fname + ".IN2")
- pop_iter = _FileIterator(pop_parser, popf, fname + ".INF")
- locus_iter = _FileIterator(locus_parser, locf, fname + ".IN2")
+ pop_iter = _FileIterator(pop_parser, fname + ".INF")
+ locus_iter = _FileIterator(locus_parser, fname + ".IN2")
return (pop_iter, locus_iter)
def _calc_diversities_fis(self, fname, ext):
- self._run_genepop([ext], [5,2], fname)
- f = open(fname + ext)
- l = f.readline()
- while l != "":
- l = l.rstrip()
- if l.startswith("Statistics per sample over all loci with at least two individuals typed"):
- avg_fis = _read_table(f, [str, _gp_float, _gp_float, _gp_float])
- avg_Qintra = _read_table(f, [str, _gp_float])
+ self._run_genepop([ext], [5, 2], fname)
+ with open(fname + ext) as f:
l = f.readline()
- f.close()
+ while l != "":
+ l = l.rstrip()
+ if l.startswith("Statistics per sample over all loci with at least two individuals typed"):
+ avg_fis = _read_table(f, [str, _gp_float, _gp_float, _gp_float])
+ avg_Qintra = _read_table(f, [str, _gp_float])
+ l = f.readline()
def fis_func(self):
l = self.stream.readline()
@@ -708,15 +714,14 @@
self.stream.readline()
fis_table = _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float])
self.stream.readline()
- avg_qinter, avg_fis = tuple(map(lambda x: _gp_float(x),
- filter(lambda y:y != "", self.stream.readline().split(" "))))
+ avg_qinter, avg_fis = tuple(_gp_float(x) for x in
+ [y for y in self.stream.readline().split(" ") if y != ""])
return locus, fis_table, avg_qinter, avg_fis
l = self.stream.readline()
self.done = True
raise StopIteration
- dvf = open(fname + ext)
- return _FileIterator(fis_func, dvf, fname + ext), avg_fis, avg_Qintra
+ return _FileIterator(fis_func, fname + ext), avg_fis, avg_Qintra
#5.2
def calc_diversities_fis_with_identity(self, fname):
@@ -742,27 +747,25 @@
This does not return the genotype frequencies.
"""
- self._run_genepop([".FST"], [6,1], fname)
- f = open(fname + ".FST")
- l = f.readline()
- while l != '':
- if l.startswith(' All:'):
- toks=filter(lambda x:x!="", l.rstrip().split(' '))
- try:
- allFis = _gp_float(toks[1])
- except ValueError:
- allFis = None
- try:
- allFst = _gp_float(toks[2])
- except ValueError:
- allFst = None
- try:
- allFit = _gp_float(toks[3])
- except ValueError:
- allFit = None
+ self._run_genepop([".FST"], [6, 1], fname)
+ with open(fname + ".FST") as f:
l = f.readline()
- f.close()
- f = open(fname + ".FST")
+ while l != '':
+ if l.startswith(' All:'):
+ toks = [x for x in l.rstrip().split(' ') if x != ""]
+ try:
+ allFis = _gp_float(toks[1])
+ except ValueError:
+ allFis = None
+ try:
+ allFst = _gp_float(toks[2])
+ except ValueError:
+ allFst = None
+ try:
+ allFit = _gp_float(toks[3])
+ except ValueError:
+ allFit = None
+ l = f.readline()
def proc(self):
if hasattr(self, "last_line"):
@@ -801,19 +804,18 @@
self.stream.close()
self.done = True
raise StopIteration
- return (allFis, allFst, allFit), _FileIterator(proc , f, fname + ".FST")
+ return (allFis, allFst, allFit), _FileIterator(proc, fname + ".FST")
#6.2
def calc_fst_pair(self, fname):
- self._run_genepop([".ST2", ".MIG"], [6,2], fname)
- f = open(fname + ".ST2")
- l = f.readline()
- while l != "":
- l = l.rstrip()
- if l.startswith("Estimates for all loci"):
- avg_fst = _read_headed_triangle_matrix(f)
+ self._run_genepop([".ST2", ".MIG"], [6, 2], fname)
+ with open(fname + ".ST2") as f:
l = f.readline()
- f.close()
+ while l != "":
+ l = l.rstrip()
+ if l.startswith("Estimates for all loci"):
+ avg_fst = _read_headed_triangle_matrix(f)
+ l = f.readline()
def loci_func(self):
l = self.stream.readline()
@@ -828,9 +830,8 @@
self.done = True
raise StopIteration
- stf = open(fname + ".ST2")
os.remove(fname + ".MIG")
- return _FileIterator(loci_func, stf, fname + ".ST2"), avg_fst
+ return _FileIterator(loci_func, fname + ".ST2"), avg_fst
#6.3
def calc_rho_all(self, fname):
@@ -843,33 +844,32 @@
def _calc_ibd(self, fname, sub, stat="a", scale="Log", min_dist=0.00001):
"""Calculates isolation by distance statistics
"""
- self._run_genepop([".GRA", ".MIG", ".ISO"], [6,sub],
+ self._run_genepop([".GRA", ".MIG", ".ISO"], [6, sub],
fname, opts = {
- "MinimalDistance" : min_dist,
- "GeographicScale" : scale,
- "IsolBDstatistic" : stat,
+ "MinimalDistance": min_dist,
+ "GeographicScale": scale,
+ "IsolBDstatistic": stat,
})
- f = open(fname + ".ISO")
- f.readline()
- f.readline()
- f.readline()
- f.readline()
- estimate = _read_triangle_matrix(f)
- f.readline()
- f.readline()
- distance = _read_triangle_matrix(f)
- f.readline()
- match = re.match("a = (.+), b = (.+)", f.readline().rstrip())
- a = _gp_float(match.group(1))
- b = _gp_float(match.group(2))
- f.readline()
- f.readline()
- match = re.match(" b=(.+)", f.readline().rstrip())
- bb = _gp_float(match.group(1))
- match = re.match(".*\[(.+) ; (.+)\]", f.readline().rstrip())
- bblow = _gp_float(match.group(1))
- bbhigh = _gp_float(match.group(2))
- f.close()
+ with open(fname + ".ISO") as f:
+ f.readline()
+ f.readline()
+ f.readline()
+ f.readline()
+ estimate = _read_triangle_matrix(f)
+ f.readline()
+ f.readline()
+ distance = _read_triangle_matrix(f)
+ f.readline()
+ match = re.match("a = (.+), b = (.+)", f.readline().rstrip())
+ a = _gp_float(match.group(1))
+ b = _gp_float(match.group(2))
+ f.readline()
+ f.readline()
+ match = re.match(" b=(.+)", f.readline().rstrip())
+ bb = _gp_float(match.group(1))
+ match = re.match(".*\[(.+) ; (.+)\]", f.readline().rstrip())
+ bblow = _gp_float(match.group(1))
+ bbhigh = _gp_float(match.group(2))
os.remove(fname + ".MIG")
os.remove(fname + ".GRA")
os.remove(fname + ".ISO")
diff -Nru python-biopython-1.62/Bio/PopGen/GenePop/EasyController.py python-biopython-1.63/Bio/PopGen/GenePop/EasyController.py
--- python-biopython-1.62/Bio/PopGen/GenePop/EasyController.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PopGen/GenePop/EasyController.py 2013-12-05 14:10:43.000000000 +0000
@@ -10,7 +10,7 @@
"""
-from Controller import GenePopController
+from .Controller import GenePopController
from Bio.PopGen import GenePop
@@ -28,9 +28,8 @@
self.__allele_frequency = {} # More caches like this needed!
def get_basic_info(self):
- f=open(self._fname)
- rec = GenePop.read(f)
- f.close()
+ with open(self._fname) as f:
+ rec = GenePop.read(f)
return rec.pop_list, rec.loci_list
def test_hw_pop(self, pop_pos, test_type = "probability"):
@@ -41,8 +40,8 @@
else:
loci_res, hw_res, fisher_full = self._controller.test_pop_hz_prob(self._fname, ".P")
for i in range(pop_pos-1):
- hw_res.next()
- return hw_res.next()
+ next(hw_res)
+ return next(hw_res)
def test_hw_global(self, test_type = "deficiency", enum_test = True,
dememorization = 10000, batches = 20, iterations = 5000):
@@ -111,7 +110,7 @@
geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
pop_iter, loc_iter = geno_freqs
pop_iter = list(pop_iter)
- return pop_iter[pop_pos][1][locus_name][2].keys()
+ return list(pop_iter[pop_pos][1][locus_name][2].keys())
def get_alleles_all_pops(self, locus_name):
"""Returns the alleles for a certain population and locus.
diff -Nru python-biopython-1.62/Bio/PopGen/GenePop/FileParser.py python-biopython-1.63/Bio/PopGen/GenePop/FileParser.py
--- python-biopython-1.62/Bio/PopGen/GenePop/FileParser.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PopGen/GenePop/FileParser.py 2013-12-05 14:10:43.000000000 +0000
@@ -68,6 +68,12 @@
self.fname = fname
self.start_read()
+ def __del__(self):
+ try:
+ self._handle.close()
+ except AttributeError:
+ pass
+
def __str__(self):
"""Returns (reconstructs) a GenePop textual representation.
@@ -191,28 +197,67 @@
fname - file to be created with population removed
"""
old_rec = read(self.fname)
- f = open(fname, "w")
- f.write(self.comment_line + "\n")
- for locus in old_rec.loci_list:
- f.write(locus + "\n")
- curr_pop = 0
- l_parser = old_rec.get_individual()
- start_pop = True
- while l_parser:
- if curr_pop == pos:
- old_rec.skip_population()
- curr_pop += 1
- else:
- if l_parser is True:
+ with open(fname, "w") as f:
+ f.write(self.comment_line + "\n")
+ for locus in old_rec.loci_list:
+ f.write(locus + "\n")
+ curr_pop = 0
+ l_parser = old_rec.get_individual()
+ start_pop = True
+ while l_parser:
+ if curr_pop == pos:
+ old_rec.skip_population()
curr_pop += 1
- start_pop = True
else:
- if start_pop:
- f.write("POP\n")
- start_pop = False
+ if l_parser is True:
+ curr_pop += 1
+ start_pop = True
+ else:
+ if start_pop:
+ f.write("POP\n")
+ start_pop = False
+ name, markers = l_parser
+ f.write(name + ",")
+ for marker in markers:
+ f.write(' ')
+ for al in marker:
+ if al is None:
+ al = '0'
+ aStr = str(al)
+ while len(aStr)<3:
+ aStr = "".join(['0', aStr])
+ f.write(aStr)
+ f.write('\n')
+
+ l_parser = old_rec.get_individual()
+
+ def remove_locus_by_position(self, pos, fname):
+ """Removes a locus by position.
+
+ pos - position
+ fname - file to be created with locus removed
+ """
+ old_rec = read(self.fname)
+ with open(fname, "w") as f:
+ f.write(self.comment_line + "\n")
+ loci_list = old_rec.loci_list
+ del loci_list[pos]
+ for locus in loci_list:
+ f.write(locus + "\n")
+ l_parser = old_rec.get_individual()
+ f.write("POP\n")
+ while l_parser:
+ if l_parser is True:
+ f.write("POP\n")
+ else:
name, markers = l_parser
f.write(name + ",")
+ marker_pos = 0
for marker in markers:
+ if marker_pos == pos:
+ marker_pos += 1
+ continue
+ marker_pos += 1
f.write(' ')
for al in marker:
if al is None:
@@ -223,48 +268,7 @@
f.write(aStr)
f.write('\n')
- l_parser = old_rec.get_individual()
- f.close()
-
- def remove_locus_by_position(self, pos, fname):
- """Removes a locus by position.
-
- pos - position
- fname - file to be created with locus removed
- """
- old_rec = read(self.fname)
- f = open(fname, "w")
- f.write(self.comment_line + "\n")
- loci_list = old_rec.loci_list
- del loci_list[pos]
- for locus in loci_list:
- f.write(locus + "\n")
- l_parser = old_rec.get_individual()
- f.write("POP\n")
- while l_parser:
- if l_parser is True:
- f.write("POP\n")
- else:
- name, markers = l_parser
- f.write(name + ",")
- marker_pos = 0
- for marker in markers:
- if marker_pos == pos:
- marker_pos += 1
- continue
- marker_pos += 1
- f.write(' ')
- for al in marker:
- if al is None:
- al = '0'
- aStr = str(al)
- while len(aStr)<3:
- aStr = "".join(['0', aStr])
- f.write(aStr)
- f.write('\n')
-
- l_parser = old_rec.get_individual()
- f.close()
+ l_parser = old_rec.get_individual()
def remove_loci_by_position(self, positions, fname):
"""Removes a set of loci by position.
@@ -273,43 +277,42 @@
fname - file to be created with locus removed
"""
old_rec = read(self.fname)
- f = open(fname, "w")
- f.write(self.comment_line + "\n")
- loci_list = old_rec.loci_list
- positions.sort()
- positions.reverse()
- posSet = set()
- for pos in positions:
- del loci_list[pos]
- posSet.add(pos)
- for locus in loci_list:
- f.write(locus + "\n")
- l_parser = old_rec.get_individual()
- f.write("POP\n")
- while l_parser:
- if l_parser is True:
- f.write("POP\n")
- else:
- name, markers = l_parser
- f.write(name + ",")
- marker_pos = 0
- for marker in markers:
- if marker_pos in posSet:
+ with open(fname, "w") as f:
+ f.write(self.comment_line + "\n")
+ loci_list = old_rec.loci_list
+ positions.sort()
+ positions.reverse()
+ posSet = set()
+ for pos in positions:
+ del loci_list[pos]
+ posSet.add(pos)
+ for locus in loci_list:
+ f.write(locus + "\n")
+ l_parser = old_rec.get_individual()
+ f.write("POP\n")
+ while l_parser:
+ if l_parser is True:
+ f.write("POP\n")
+ else:
+ name, markers = l_parser
+ f.write(name + ",")
+ marker_pos = 0
+ for marker in markers:
+ if marker_pos in posSet:
+ marker_pos += 1
+ continue
marker_pos += 1
- continue
- marker_pos += 1
- f.write(' ')
- for al in marker:
- if al is None:
- al = '0'
- aStr = str(al)
- while len(aStr)<3:
- aStr = "".join(['0', aStr])
- f.write(aStr)
- f.write('\n')
+ f.write(' ')
+ for al in marker:
+ if al is None:
+ al = '0'
+ aStr = str(al)
+ while len(aStr)<3:
+ aStr = "".join(['0', aStr])
+ f.write(aStr)
+ f.write('\n')
- l_parser = old_rec.get_individual()
- f.close()
+ l_parser = old_rec.get_individual()
def remove_locus_by_name(self, name, fname):
"""Removes a locus by name.
diff -Nru python-biopython-1.62/Bio/PopGen/GenePop/__init__.py python-biopython-1.63/Bio/PopGen/GenePop/__init__.py
--- python-biopython-1.62/Bio/PopGen/GenePop/__init__.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PopGen/GenePop/__init__.py 2013-12-05 14:10:43.000000000 +0000
@@ -51,11 +51,11 @@
handle is a file-like object that contains a GenePop record.
"""
record = Record()
- record.comment_line = str(handle.next()).rstrip()
+ record.comment_line = str(next(handle)).rstrip()
#We can now have one loci per line or all loci in a single line
#separated by either space or comma+space...
#We will remove all commas on loci... that should not be a problem
- sample_loci_line = str(handle.next()).rstrip().replace(',', '')
+ sample_loci_line = str(next(handle)).rstrip().replace(',', '')
all_loci = sample_loci_line.split(' ')
record.loci_list.extend(all_loci)
for line in handle:
diff -Nru python-biopython-1.62/Bio/PopGen/SimCoal/Async.py python-biopython-1.63/Bio/PopGen/SimCoal/Async.py
--- python-biopython-1.62/Bio/PopGen/SimCoal/Async.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PopGen/SimCoal/Async.py 2013-12-05 14:10:43.000000000 +0000
@@ -10,7 +10,7 @@
import os
-import Cache
+from . import Cache
class SimCoalCache(Cache.SimCoalCache):
@@ -25,8 +25,7 @@
f = inputFiles[parFile]
text = f.read()
f.close()
- w = open(os.sep.join([self.data_dir, 'SimCoal', 'runs', parFile]), 'w')
- w.write(text)
- w.close()
+ with open(os.sep.join([self.data_dir, 'SimCoal', 'runs', parFile]), 'w') as w:
+ w.write(text)
self.run_simcoal(parFile, numSims, ploydi)
return 0, None
diff -Nru python-biopython-1.62/Bio/PopGen/SimCoal/Cache.py python-biopython-1.63/Bio/PopGen/SimCoal/Cache.py
--- python-biopython-1.62/Bio/PopGen/SimCoal/Cache.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PopGen/SimCoal/Cache.py 2013-12-05 14:10:43.000000000 +0000
@@ -8,7 +8,7 @@
import os
import tarfile
-from Controller import SimCoalController
+from .Controller import SimCoalController
class SimCoalCache(object):
diff -Nru python-biopython-1.62/Bio/PopGen/SimCoal/Template.py python-biopython-1.63/Bio/PopGen/SimCoal/Template.py
--- python-biopython-1.62/Bio/PopGen/SimCoal/Template.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/PopGen/SimCoal/Template.py 2013-12-05 14:10:43.000000000 +0000
@@ -3,8 +3,11 @@
# license. Please see the LICENSE file that should have been included
# as part of this package.
+from __future__ import print_function
+
from os import sep
import re
+from functools import reduce
from Bio.PopGen.SimCoal import builtin_tpl_dir
@@ -34,12 +37,11 @@
#reg = re.compile('\?' + name, re.MULTILINE)
#template = re.sub(reg, str(val), template)
template = template.replace('?'+name, str(val))
- f = open(f_name + '.par', 'w')
- #executed_template = template
- executed_template = exec_template(template)
- clean_template = executed_template.replace('\r\n','\n').replace('\n\n','\n')
- f.write(clean_template)
- f.close()
+ with open(f_name + '.par', 'w') as f:
+ #executed_template = template
+ executed_template = exec_template(template)
+ clean_template = executed_template.replace('\r\n', '\n').replace('\n\n', '\n')
+ f.write(clean_template)
return [f_name]
else:
name, rng = para_list[0]
@@ -156,15 +158,15 @@
'''
if tp_dir is None:
#Internal Template
- f = open(sep.join([builtin_tpl_dir, model + '.par']), 'r')
+ filename = sep.join([builtin_tpl_dir, model + '.par'])
else:
#External template
- f = open(sep.join([tp_dir, model + '.par']), 'r')
- l = f.readline()
- while l!='':
- stream.write(l)
+ filename = sep.join([tp_dir, model + '.par'])
+ with open(filename, 'r') as f:
l = f.readline()
- f.close()
+ while l!='':
+ stream.write(l)
+ l = f.readline()
def _gen_loci(stream, loci):
@@ -173,8 +175,7 @@
stream.write('//Per Block: Data type, No. of loci, Recombination rate to the right-side locus, plus optional parameters\n')
for locus in loci:
stream.write(' '.join([locus[0]] +
- map(lambda x: str(x), list(locus[1])
- )) + '\n')
+ [str(x) for x in list(locus[1])]) + '\n')
def get_chr_template(stream, chrs):
@@ -217,13 +218,10 @@
get_demography_template, chrs from get_chr_template and
params from generate_model).
'''
- stream = open(out_dir + sep + 'tmp.par', 'w')
- get_demography_template(stream, model, tp_dir)
- get_chr_template(stream, chrs)
- stream.close()
- #par_stream = open(out_dir + sep + 'tmp.par', 'r')
- #print par_stream.read()
- #par_stream.close()
- par_stream = open(out_dir + sep + 'tmp.par', 'r')
- generate_model(par_stream, model, params, out_dir = out_dir)
- par_stream.close()
+ with open(out_dir + sep + 'tmp.par', 'w') as stream:
+ get_demography_template(stream, model, tp_dir)
+ get_chr_template(stream, chrs)
+ #with open(out_dir + sep + 'tmp.par', 'r') as par_stream:
+ #print par_stream.read()
+ with open(out_dir + sep + 'tmp.par', 'r') as par_stream:
+ generate_model(par_stream, model, params, out_dir = out_dir)
diff -Nru python-biopython-1.62/Bio/Restriction/PrintFormat.py python-biopython-1.63/Bio/Restriction/PrintFormat.py
--- python-biopython-1.62/Bio/Restriction/PrintFormat.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Restriction/PrintFormat.py 2013-12-05 14:10:43.000000000 +0000
@@ -8,7 +8,12 @@
# as part of this package.
#
+from __future__ import print_function
+
import re
+
+from Bio._py3k import range
+
from Bio.Restriction import RanaConfig as RanaConf
"""
@@ -29,7 +34,7 @@
>>> handle.close()
>>> dct = AllEnzymes.search(pBR322.seq)
>>> new = PrintFormat()
- >>> new.print_that(dct, '\n my pBR322 analysis\n\n','\n no site :\n\n')
+ >>> new.print_that(dct, '\n my pBR322 analysis\n\n', '\n no site :\n\n')
my pBR322 analysis
@@ -110,12 +115,12 @@
if not dct:
dct = self.results
ls, nc = [], []
- for k, v in dct.iteritems():
+ for k, v in dct.items():
if v:
- ls.append((k,v))
+ ls.append((k, v))
else:
nc.append(k)
- print self.make_format(ls, title, nc, s1)
+ print(self.make_format(ls, title, nc, s1))
return
def make_format(self, cut=[], title='', nc=[], s1=''):
@@ -124,11 +129,11 @@
Virtual method.
Here to be pointed to one of the _make_* methods.
You can as well create a new method and point make_format to it."""
- return self._make_list(cut,title, nc,s1)
+ return self._make_list(cut, title, nc, s1)
###### _make_* methods to be used with the virtual method make_format
- def _make_list(self, ls,title, nc,s1):
+ def _make_list(self, ls, title, nc, s1):
"""PF._make_number(ls,title, nc,s1) -> string.
return a string of form:
@@ -144,7 +149,7 @@
s1 is the sentence before the non cutting enzymes."""
return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)
- def _make_map(self, ls,title, nc,s1):
+ def _make_map(self, ls, title, nc, s1):
"""PF._make_number(ls,title, nc,s1) -> string.
return a string of form:
@@ -163,7 +168,7 @@
s1 is the sentence before the non cutting enzymes."""
return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)
- def _make_number(self, ls,title, nc,s1):
+ def _make_number(self, ls, title, nc, s1):
"""PF._make_number(ls,title, nc,s1) -> string.
title.
@@ -181,9 +186,9 @@
title is the title.
nc is a list of non cutting enzymes.
s1 is the sentence before the non cutting enzymes."""
- return self._make_number_only(ls, title)+self._make_nocut_only(nc,s1)
+ return self._make_number_only(ls, title)+self._make_nocut_only(nc, s1)
- def _make_nocut(self, ls,title, nc,s1):
+ def _make_nocut(self, ls, title, nc, s1):
"""PF._make_nocut(ls,title, nc,s1) -> string.
return a formatted string of the non cutting enzymes.
@@ -257,7 +262,7 @@
Non cutting enzymes are not included."""
if not ls:
return title
- ls.sort(lambda x,y : cmp(len(x[1]), len(y[1])))
+ ls.sort(lambda x, y : cmp(len(x[1]), len(y[1])))
iterator = iter(ls)
cur_len = 1
new_sect = []
@@ -268,7 +273,7 @@
title = self.__next_section(new_sect, title)
new_sect, cur_len = [(name, sites)], l
continue
- new_sect.append((name,sites))
+ new_sect.append((name, sites))
title += "\n\nenzymes which cut %i times :\n\n"%cur_len
return self.__next_section(new_sect, title)
@@ -291,8 +296,7 @@
"""
if not ls:
return title
- resultKeys = [str(x) for x,y in ls]
- resultKeys.sort()
+ resultKeys = sorted(str(x) for x, y in ls)
map = title or ''
enzymemap = {}
for (enzyme, cut) in ls:
@@ -301,11 +305,10 @@
enzymemap[c].append(str(enzyme))
else:
enzymemap[c] = [str(enzyme)]
- mapping = enzymemap.keys()
- mapping.sort()
+ mapping = sorted(enzymemap.keys())
cutloc = {}
x, counter, length = 0, 0, len(self.sequence)
- for x in xrange(60, length, 60):
+ for x in range(60, length, 60):
counter = x - 60
l=[]
for key in mapping:
@@ -323,14 +326,14 @@
base, counter = 0, 0
emptyline = ' ' * 60
Join = ''.join
- for base in xrange(60, length, 60):
+ for base in range(60, length, 60):
counter = base - 60
line = emptyline
for key in cutloc[counter]:
s = ''
if key == base:
for n in enzymemap[key]:
- s = ' '.join((s,n))
+ s = ' '.join((s, n))
l = line[0:59]
lineo = Join((l, str(key), s, '\n'))
line2 = Join((l, a, '\n'))
@@ -338,17 +341,17 @@
map = Join((map, linetot))
break
for n in enzymemap[key]:
- s = ' '.join((s,n))
+ s = ' '.join((s, n))
k = key%60
lineo = Join((line[0:(k-1)], str(key), s, '\n'))
line = Join((line[0:(k-1)], a, line[k:]))
line2 = Join((line[0:(k-1)], a, line[k:], '\n'))
- linetot = Join((lineo,line2))
- map = Join((map,linetot))
- mapunit = '\n'.join((sequence[counter : base],a * 60,
+ linetot = Join((lineo, line2))
+ map = Join((map, linetot))
+ mapunit = '\n'.join((sequence[counter : base], a * 60,
revsequence[counter : base],
Join((str.ljust(str(counter+1), 15), ' '* 30,
- str.rjust(str(base), 15),'\n\n'))
+ str.rjust(str(base), 15), '\n\n'))
))
map = Join((map, mapunit))
line = ' '* 60
@@ -356,29 +359,29 @@
s = ''
if key == length:
for n in enzymemap[key]:
- s = Join((s,' ',n))
+ s = Join((s, ' ', n))
l = line[0:(length-1)]
- lineo = Join((l,str(key),s,'\n'))
- line2 = Join((l,a,'\n'))
+ lineo = Join((l, str(key), s, '\n'))
+ line2 = Join((l, a, '\n'))
linetot = Join((lineo, line2))
map = Join((map, linetot))
break
for n in enzymemap[key]:
- s = Join((s,' ',n))
+ s = Join((s, ' ', n))
k = key%60
- lineo = Join((line[0:(k-1)],str(key),s,'\n'))
- line = Join((line[0:(k-1)],a,line[k:]))
- line2 = Join((line[0:(k-1)],a,line[k:],'\n'))
- linetot = Join((lineo,line2))
- map = Join((map,linetot))
+ lineo = Join((line[0:(k-1)], str(key), s, '\n'))
+ line = Join((line[0:(k-1)], a, line[k:]))
+ line2 = Join((line[0:(k-1)], a, line[k:], '\n'))
+ linetot = Join((lineo, line2))
+ map = Join((map, linetot))
mapunit = ''
mapunit = Join((sequence[base : length], '\n'))
mapunit = Join((mapunit, a * (length-base), '\n'))
- mapunit = Join((mapunit,revsequence[base:length], '\n'))
+ mapunit = Join((mapunit, revsequence[base:length], '\n'))
mapunit = Join((mapunit, Join((str.ljust(str(base+1), 15), ' '*(
- length-base-30),str.rjust(str(length), 15),
+ length-base-30), str.rjust(str(length), 15),
'\n\n'))))
- map = Join((map,mapunit))
+ map = Join((map, mapunit))
return map
###### private method to do lists:
@@ -404,7 +407,7 @@
several, Join = '', ''.join
for name, sites in ls:
stringsite = ''
- l = Join((', '.join([str(site) for site in sites]), '.'))
+ l = Join((', '.join(str(site) for site in sites), '.'))
if len(l) > linesize:
#
# cut where appropriate and add the indentation
@@ -414,5 +417,6 @@
else:
stringsite = l
into = Join((into,
- str(name).ljust(self.NameWidth),' : ',stringsite,'\n'))
+ str(name).ljust(self.NameWidth), ' : ', stringsite, '\n'))
return into
+
diff -Nru python-biopython-1.62/Bio/Restriction/Restriction.py python-biopython-1.63/Bio/Restriction/Restriction.py
--- python-biopython-1.62/Bio/Restriction/Restriction.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Restriction/Restriction.py 2013-12-05 14:10:43.000000000 +0000
@@ -78,6 +78,11 @@
----------------------------------------------------------------------------
"""
+from __future__ import print_function
+from Bio._py3k import zip
+from Bio._py3k import filter
+from Bio._py3k import range
+
import re
import itertools
@@ -247,7 +252,7 @@
# super(RestrictionType, cls).__init__(cls, name, bases, dct)
try :
cls.compsite = re.compile(cls.compsite)
- except Exception, err :
+ except Exception as err :
raise ValueError("Problem with regular expression, re.compiled(%s)"
% repr(cls.compsite))
@@ -473,9 +478,8 @@
@classmethod
def all_suppliers(self):
"""RE.all_suppliers -> print all the suppliers of R"""
- supply = [x[0] for x in suppliers_dict.itervalues()]
- supply.sort()
- print ",\n".join(supply)
+ supply = sorted(x[0] for x in suppliers_dict.values())
+ print(",\n".join(supply))
return
@classmethod
@@ -534,8 +538,7 @@
neoschizomer <=> same site, different position of restriction."""
if not batch:
batch = AllEnzymes
- r = [x for x in batch if self >> x]
- r.sort()
+ r = sorted(x for x in batch if self >> x)
return r
@classmethod
@@ -833,8 +836,8 @@
implement the search method for palindromic and non palindromic enzyme.
"""
- siteloc = self.dna.finditer(self.compsite,self.size)
- self.results = [r for s,g in siteloc for r in self._modify(s)]
+ siteloc = self.dna.finditer(self.compsite, self.size)
+ self.results = [r for s, g in siteloc for r in self._modify(s)]
if self.results:
self._drop()
return self.results
@@ -1008,7 +1011,7 @@
#
# if more than one site add them.
#
- fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
+ fragments += [d[r[x]:r[x+1]] for x in range(length)]
#
# LAST site to END of the sequence.
#
@@ -1026,7 +1029,7 @@
#
# add the others.
#
- fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
+ fragments += [d[r[x]:r[x+1]] for x in range(length)]
return tuple(fragments)
catalyze = catalyse
@@ -1080,8 +1083,7 @@
list of all the enzymes that share compatible end with RE."""
if not batch:
batch = AllEnzymes
- r = [x for x in iter(AllEnzymes) if x.is_blunt()]
- r.sort()
+ r = sorted(x for x in iter(AllEnzymes) if x.is_blunt())
return r
@staticmethod
@@ -1129,7 +1131,7 @@
#
# if more than one site add them.
#
- fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
+ fragments += [d[r[x]:r[x+1]] for x in range(length)]
#
# LAST site to END of the sequence.
#
@@ -1147,7 +1149,7 @@
#
# add the others.
#
- fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
+ fragments += [d[r[x]:r[x+1]] for x in range(length)]
return tuple(fragments)
catalyze = catalyse
@@ -1201,8 +1203,7 @@
list of all the enzymes that share compatible end with RE."""
if not batch:
batch = AllEnzymes
- r = [x for x in iter(AllEnzymes) if x.is_5overhang() and x % self]
- r.sort()
+ r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and x % self)
return r
@classmethod
@@ -1253,7 +1254,7 @@
#
# if more than one site add them.
#
- fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
+ fragments += [d[r[x]:r[x+1]] for x in range(length)]
#
# LAST site to END of the sequence.
#
@@ -1271,7 +1272,7 @@
#
# add the others.
#
- fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
+ fragments += [d[r[x]:r[x+1]] for x in range(length)]
return tuple(fragments)
catalyze = catalyse
@@ -1325,8 +1326,7 @@
list of all the enzymes that share compatible end with RE."""
if not batch:
batch = AllEnzymes
- r = [x for x in iter(AllEnzymes) if x.is_3overhang() and x % self]
- r.sort()
+ r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and x % self)
return r
@classmethod
@@ -1635,7 +1635,7 @@
re = site + (f5-length)*'N' + '^_N'
else:
raise ValueError('%s.easyrepr() : error f5=%i'
- % (self.name,f5))
+ % (self.name, f5))
else:
if f3 == 0:
if f5 == 0:
@@ -1776,10 +1776,8 @@
@classmethod
def suppliers(self):
"""RE.suppliers() -> print the suppliers of RE."""
- supply = suppliers_dict.items()
- for k,v in supply:
- if k in self.suppl:
- print v[0]+','
+ for s in self.suppliers_dict():
+ print(s + ',')
return
@classmethod
@@ -1787,7 +1785,7 @@
"""RE.supplier_list() -> list.
list of the supplier names for RE."""
- return [v[0] for k,v in suppliers_dict.items() if k in self.suppl]
+ return [v[0] for k, v in suppliers_dict.items() if k in self.suppl]
@classmethod
def buffers(self, supplier):
@@ -1899,7 +1897,7 @@
the new batch will contains only the enzymes for which
func return True."""
- d = [x for x in itertools.ifilter(func, self)]
+ d = [x for x in filter(func, self)]
new = RestrictionBatch()
new._data = dict(zip(d, [True]*len(d)))
return new
@@ -1922,8 +1920,7 @@
return a sorted list of the suppliers which have been used to
create the batch."""
- suppl_list = [suppliers_dict[x][0] for x in self.suppliers]
- suppl_list.sort()
+ suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers)
return suppl_list
def __iadd__(self, other):
@@ -2002,7 +1999,7 @@
else:
continue
return True
- d = [k for k in itertools.ifilter(splittest, self)]
+ d = [k for k in filter(splittest, self)]
new = RestrictionBatch()
new._data = dict(zip(d, [True]*len(d)))
return new
@@ -2011,8 +2008,7 @@
"""B.elements() -> tuple.
give all the names of the enzymes in B sorted alphabetically."""
- l = [str(e) for e in self]
- l.sort()
+ l = sorted(str(e) for e in self)
return l
def as_string(self):
@@ -2026,14 +2022,14 @@
"""B.suppl_codes() -> dict
letter code for the suppliers"""
- supply = dict([(k,v[0]) for k,v in suppliers_dict.iteritems()])
+ supply = dict((k, v[0]) for k, v in suppliers_dict.items())
return supply
@classmethod
def show_codes(self):
"""B.show_codes() -> letter codes for the suppliers"""
- supply = [' = '.join(i) for i in self.suppl_codes().iteritems()]
- print '\n'.join(supply)
+ supply = [' = '.join(i) for i in self.suppl_codes().items()]
+ print('\n'.join(supply))
return
def search(self, dna, linear=True):
@@ -2056,14 +2052,14 @@
else:
self.already_mapped = str(dna), linear
fseq = FormattedSeq(dna, linear)
- self.mapping = dict([(x, x.search(fseq)) for x in self])
+ self.mapping = dict((x, x.search(fseq)) for x in self)
return self.mapping
elif isinstance(dna, FormattedSeq):
if (str(dna), dna.linear) == self.already_mapped:
return self.mapping
else:
self.already_mapped = str(dna), dna.linear
- self.mapping = dict([(x, x.search(dna)) for x in self])
+ self.mapping = dict((x, x.search(dna)) for x in self)
return self.mapping
raise TypeError("Expected Seq or MutableSeq instance, got %s instead"
%type(dna))
@@ -2094,7 +2090,7 @@
def __repr__(self):
return 'Analysis(%s,%s,%s)'%\
- (repr(self.rb),repr(self.sequence),self.linear)
+ (repr(self.rb), repr(self.sequence), self.linear)
def _sub_set(self, wanted):
"""A._sub_set(other_set) -> dict.
@@ -2104,7 +2100,7 @@
screen the results through wanted set.
Keep only the results for which the enzymes is in wanted set.
"""
- return dict([(k,v) for k,v in self.mapping.iteritems() if k in wanted])
+ return dict((k, v) for k, v in self.mapping.items() if k in wanted)
def _boundaries(self, start, end):
"""A._boundaries(start, end) -> tuple.
@@ -2154,7 +2150,7 @@
"""
if not dct:
dct = self.mapping
- print
+ print("")
return PrintFormat.print_that(self, dct, title, s1)
def change(self, **what):
@@ -2168,7 +2164,7 @@
you expect. In which case, you can settle back to a 80 columns shell
or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
you get it right."""
- for k,v in what.iteritems():
+ for k, v in what.items():
if k in ('NameWidth', 'ConsoleWidth'):
setattr(self, k, v)
self.Cmodulo = self.ConsoleWidth % self.NameWidth
@@ -2204,7 +2200,7 @@
Only the enzymes which have a 3'overhang restriction site."""
if not dct:
dct = self.mapping
- return dict([(k,v) for k,v in dct.iteritems() if k.is_blunt()])
+ return dict((k, v) for k, v in dct.items() if k.is_blunt())
def overhang5(self, dct=None):
"""A.overhang5([dct]) -> dict.
@@ -2212,7 +2208,7 @@
Only the enzymes which have a 5' overhang restriction site."""
if not dct:
dct = self.mapping
- return dict([(k,v) for k,v in dct.iteritems() if k.is_5overhang()])
+ return dict((k, v) for k, v in dct.items() if k.is_5overhang())
def overhang3(self, dct=None):
"""A.Overhang3([dct]) -> dict.
@@ -2220,7 +2216,7 @@
Only the enzymes which have a 3'overhang restriction site."""
if not dct:
dct = self.mapping
- return dict([(k,v) for k,v in dct.iteritems() if k.is_3overhang()])
+ return dict((k, v) for k, v in dct.items() if k.is_3overhang())
def defined(self, dct=None):
"""A.defined([dct]) -> dict.
@@ -2228,7 +2224,7 @@
Only the enzymes that have a defined restriction site in Rebase."""
if not dct:
dct = self.mapping
- return dict([(k,v) for k,v in dct.iteritems() if k.is_defined()])
+ return dict((k, v) for k, v in dct.items() if k.is_defined())
def with_sites(self, dct=None):
"""A.with_sites([dct]) -> dict.
@@ -2236,7 +2232,7 @@
Enzymes which have at least one site in the sequence."""
if not dct:
dct = self.mapping
- return dict([(k,v) for k,v in dct.iteritems() if v])
+ return dict((k, v) for k, v in dct.items() if v)
def without_site(self, dct=None):
"""A.without_site([dct]) -> dict.
@@ -2244,7 +2240,7 @@
Enzymes which have no site in the sequence."""
if not dct:
dct = self.mapping
- return dict([(k,v) for k,v in dct.iteritems() if not v])
+ return dict((k, v) for k, v in dct.items() if not v)
def with_N_sites(self, N, dct=None):
"""A.With_N_Sites(N [, dct]) -> dict.
@@ -2252,12 +2248,12 @@
Enzymes which cut N times the sequence."""
if not dct:
dct = self.mapping
- return dict([(k,v) for k,v in dct.iteritems()if len(v) == N])
+ return dict((k, v) for k, v in dct.items()if len(v) == N)
def with_number_list(self, list, dct= None):
if not dct:
dct = self.mapping
- return dict([(k,v) for k,v in dct.iteritems() if len(v) in list])
+ return dict((k, v) for k, v in dct.items() if len(v) in list)
def with_name(self, names, dct=None):
"""A.with_name(list_of_names [, dct]) ->
@@ -2265,11 +2261,11 @@
Limit the search to the enzymes named in list_of_names."""
for i, enzyme in enumerate(names):
if not enzyme in AllEnzymes:
- print "no data for the enzyme:", str(name)
+ print("no data for the enzyme: %s" % name)
del names[i]
if not dct:
return RestrictionBatch(names).search(self.sequence)
- return dict([(n, dct[n]) for n in names if n in dct])
+ return dict((n, dct[n]) for n in names if n in dct)
def with_site_size(self, site_size, dct=None):
"""A.with_site_size(site_size [, dct]) ->
@@ -2278,7 +2274,7 @@
sites = [name for name in self if name.size == site_size]
if not dct:
return RestrictionBatch(sites).search(self.sequence)
- return dict([(k,v) for k,v in dct.iteritems() if k in site_size])
+ return dict((k, v) for k, v in dct.items() if k in site_size)
def only_between(self, start, end, dct=None):
"""A.only_between(start, end[, dct]) -> dict.
@@ -2288,7 +2284,7 @@
if not dct:
dct = self.mapping
d = dict(dct)
- for key, sites in dct.iteritems():
+ for key, sites in dct.items():
if not sites:
del d[key]
continue
@@ -2309,7 +2305,7 @@
d = {}
if not dct:
dct = self.mapping
- for key, sites in dct.iteritems():
+ for key, sites in dct.items():
for site in sites:
if test(start, end, site):
d[key] = sites
@@ -2340,7 +2336,7 @@
if not dct:
dct = self.mapping
d = dict(dct)
- for key, sites in dct.iteritems():
+ for key, sites in dct.items():
if not sites:
del d[key]
continue
@@ -2361,7 +2357,7 @@
if not dct:
dct = self.mapping
d = {}
- for key, sites in dct.iteritems():
+ for key, sites in dct.items():
for site in sites:
if test(start, end, site):
continue
@@ -2404,7 +2400,7 @@
#
CommOnly = RestrictionBatch() # commercial enzymes
NonComm = RestrictionBatch() # not available commercially
-for TYPE, (bases, enzymes) in typedict.iteritems():
+for TYPE, (bases, enzymes) in typedict.items():
#
# The keys are the pseudo-types TYPE (stored as type1, type2...)
# The names are not important and are only present to differentiate
@@ -2422,7 +2418,7 @@
#
# First eval the bases.
#
- bases = tuple([eval(x) for x in bases])
+ bases = tuple(eval(x) for x in bases)
#
# now create the particular value of RestrictionType for the classes
# in enzymes.
@@ -2458,5 +2454,5 @@
#Scoping changed in Python 3, the variable isn't leaked
pass
locals().update(dict(zip(names, AllEnzymes)))
-__all__=['FormattedSeq', 'Analysis', 'RestrictionBatch','AllEnzymes','CommOnly','NonComm']+names
+__all__=['FormattedSeq', 'Analysis', 'RestrictionBatch', 'AllEnzymes', 'CommOnly', 'NonComm']+names
del k, enzymes, TYPE, bases, names
diff -Nru python-biopython-1.62/Bio/Restriction/Restriction_Dictionary.py python-biopython-1.63/Bio/Restriction/Restriction_Dictionary.py
--- python-biopython-1.62/Bio/Restriction/Restriction_Dictionary.py 2013-08-28 21:34:02.000000000 +0000
+++ python-biopython-1.63/Bio/Restriction/Restriction_Dictionary.py 2013-12-05 14:10:43.000000000 +0000
@@ -20,16610 +20,17153 @@
rest_dict = {}
def _temp():
return {
- 'compsite' : '(?PTTATAA)|(?PTTATAA)',
- 'results' : None,
- 'site' : 'TTATAA',
- 'substrat' : 'DNA',
- 'fst3' : -3,
- 'fst5' : 3,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 0,
- 'scd3' : None,
- 'suppl' : ('F',),
- 'scd5' : None,
- 'charac' : (3, -3, None, None, 'TTATAA'),
- 'ovhgseq' : '',
+ 'compsite': '(?PTTATAA)',
+ 'results': None,
+ 'site': 'TTATAA',
+ 'substrat': 'DNA',
+ 'fst3': -3,
+ 'fst5': 3,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 0,
+ 'scd3': None,
+ 'suppl': ('F',),
+ 'scd5': None,
+ 'charac': (3, -3, None, None, 'TTATAA'),
+ 'ovhgseq': '',
}
rest_dict['AanI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCACCTGC)|(?PGCAGGTG)',
- 'results' : None,
- 'site' : 'CACCTGC',
- 'substrat' : 'DNA',
- 'fst3' : 8,
- 'fst5' : 11,
- 'freq' : 16384,
- 'size' : 7,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('F',),
- 'scd5' : None,
- 'charac' : (11, 8, None, None, 'CACCTGC'),
- 'ovhgseq' : 'NNNN',
+ 'compsite': '(?PCACCTGC)|(?PGCAGGTG)',
+ 'results': None,
+ 'site': 'CACCTGC',
+ 'substrat': 'DNA',
+ 'fst3': 8,
+ 'fst5': 11,
+ 'freq': 16384,
+ 'size': 7,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('F',),
+ 'scd5': None,
+ 'charac': (11, 8, None, None, 'CACCTGC'),
+ 'ovhgseq': 'NNNN',
}
rest_dict['AarI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGAC......GTC)|(?PGAC......GTC)',
- 'results' : None,
- 'site' : 'GACNNNNNNGTC',
- 'substrat' : 'DNA',
- 'fst3' : -7,
- 'fst5' : 7,
- 'freq' : 4096,
- 'size' : 12,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 2,
- 'scd3' : None,
- 'suppl' : ('F',),
- 'scd5' : None,
- 'charac' : (7, -7, None, None, 'GACNNNNNNGTC'),
- 'ovhgseq' : 'NN',
+ 'compsite': '(?PGAC......GTC)',
+ 'results': None,
+ 'site': 'GACNNNNNNGTC',
+ 'substrat': 'DNA',
+ 'fst3': -7,
+ 'fst5': 7,
+ 'freq': 4096,
+ 'size': 12,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 2,
+ 'scd3': None,
+ 'suppl': ('F',),
+ 'scd5': None,
+ 'charac': (7, -7, None, None, 'GACNNNNNNGTC'),
+ 'ovhgseq': 'NN',
}
rest_dict['AasI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PAGGCCT)|(?PAGGCCT)',
- 'results' : None,
- 'site' : 'AGGCCT',
- 'substrat' : 'DNA',
- 'fst3' : -3,
- 'fst5' : 3,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 0,
- 'scd3' : None,
- 'suppl' : ('O',),
- 'scd5' : None,
- 'charac' : (3, -3, None, None, 'AGGCCT'),
- 'ovhgseq' : '',
- }
-rest_dict['AatI'] = _temp()
+ 'compsite': '(?PGACGTC)',
+ 'results': None,
+ 'site': 'GACGTC',
+ 'substrat': 'DNA',
+ 'fst3': -5,
+ 'fst5': 5,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 4,
+ 'scd3': None,
+ 'suppl': ('F', 'I', 'K', 'M', 'N', 'R'),
+ 'scd5': None,
+ 'charac': (5, -5, None, None, 'GACGTC'),
+ 'ovhgseq': 'ACGT',
+ }
+rest_dict['AatII'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGACGTC)|(?PGACGTC)',
- 'results' : None,
- 'site' : 'GACGTC',
- 'substrat' : 'DNA',
- 'fst3' : -5,
- 'fst5' : 5,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 4,
- 'scd3' : None,
- 'suppl' : ('F', 'I', 'K', 'M', 'N', 'O', 'R', 'V'),
- 'scd5' : None,
- 'charac' : (5, -5, None, None, 'GACGTC'),
- 'ovhgseq' : 'ACGT',
- }
-rest_dict['AatII'] = _temp()
+ 'compsite': '(?PC)|(?PG)',
+ 'results': None,
+ 'site': 'C',
+ 'substrat': 'DNA',
+ 'fst3': 9,
+ 'fst5': 12,
+ 'freq': 4,
+ 'size': 1,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 2,
+ 'scd3': None,
+ 'suppl': ('N',),
+ 'scd5': None,
+ 'charac': (12, 9, None, None, 'C'),
+ 'ovhgseq': 'NN',
+ }
+rest_dict['AbaSI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCCTCGAGG)|(?PCCTCGAGG)',
- 'results' : None,
- 'site' : 'CCTCGAGG',
- 'substrat' : 'DNA',
- 'fst3' : -2,
- 'fst5' : 2,
- 'freq' : 65536,
- 'size' : 8,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('I',),
- 'scd5' : None,
- 'charac' : (2, -2, None, None, 'CCTCGAGG'),
- 'ovhgseq' : 'TCGA',
+ 'compsite': '(?PCCTCGAGG)',
+ 'results': None,
+ 'site': 'CCTCGAGG',
+ 'substrat': 'DNA',
+ 'fst3': -2,
+ 'fst5': 2,
+ 'freq': 65536,
+ 'size': 8,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('I',),
+ 'scd5': None,
+ 'charac': (2, -2, None, None, 'CCTCGAGG'),
+ 'ovhgseq': 'TCGA',
}
rest_dict['AbsI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PTGCGCA)|(?PTGCGCA)',
- 'results' : None,
- 'site' : 'TGCGCA',
- 'substrat' : 'DNA',
- 'fst3' : -3,
- 'fst5' : 3,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 0,
- 'scd3' : None,
- 'suppl' : ('I', 'V'),
- 'scd5' : None,
- 'charac' : (3, -3, None, None, 'TGCGCA'),
- 'ovhgseq' : '',
+ 'compsite': '(?PTGCGCA)',
+ 'results': None,
+ 'site': 'TGCGCA',
+ 'substrat': 'DNA',
+ 'fst3': -3,
+ 'fst5': 3,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 0,
+ 'scd3': None,
+ 'suppl': ('I',),
+ 'scd5': None,
+ 'charac': (3, -3, None, None, 'TGCGCA'),
+ 'ovhgseq': '',
}
rest_dict['Acc16I'] = _temp()
def _temp():
return {
- 'compsite' : '(?PACCTGC)|(?PGCAGGT)',
- 'results' : None,
- 'site' : 'ACCTGC',
- 'substrat' : 'DNA',
- 'fst3' : 8,
- 'fst5' : 10,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('I',),
- 'scd5' : None,
- 'charac' : (10, 8, None, None, 'ACCTGC'),
- 'ovhgseq' : 'NNNN',
+ 'compsite': '(?PACCTGC)|(?PGCAGGT)',
+ 'results': None,
+ 'site': 'ACCTGC',
+ 'substrat': 'DNA',
+ 'fst3': 8,
+ 'fst5': 10,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('I',),
+ 'scd5': None,
+ 'charac': (10, 8, None, None, 'ACCTGC'),
+ 'ovhgseq': 'NNNN',
}
rest_dict['Acc36I'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGGTACC)|(?PGGTACC)',
- 'results' : None,
- 'site' : 'GGTACC',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('F', 'I', 'N', 'R', 'V', 'W'),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'GGTACC'),
- 'ovhgseq' : 'GTAC',
+ 'compsite': '(?PGGTACC)',
+ 'results': None,
+ 'site': 'GGTACC',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('F', 'I', 'N', 'R'),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'GGTACC'),
+ 'ovhgseq': 'GTAC',
}
rest_dict['Acc65I'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGG[CT][AG]CC)|(?PGG[CT][AG]CC)',
- 'results' : None,
- 'site' : 'GGYRCC',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 1024,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('I', 'V'),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'GGYRCC'),
- 'ovhgseq' : 'GYRC',
+ 'compsite': '(?PGG[CT][AG]CC)',
+ 'results': None,
+ 'site': 'GGYRCC',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 1024,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('I',),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'GGYRCC'),
+ 'ovhgseq': 'GYRC',
}
rest_dict['AccB1I'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCCA.....TGG)|(?PCCA.....TGG)',
- 'results' : None,
- 'site' : 'CCANNNNNTGG',
- 'substrat' : 'DNA',
- 'fst3' : -7,
- 'fst5' : 7,
- 'freq' : 4096,
- 'size' : 11,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 3,
- 'scd3' : None,
- 'suppl' : ('I', 'R', 'V'),
- 'scd5' : None,
- 'charac' : (7, -7, None, None, 'CCANNNNNTGG'),
- 'ovhgseq' : 'NNN',
+ 'compsite': '(?PCCA.....TGG)',
+ 'results': None,
+ 'site': 'CCANNNNNTGG',
+ 'substrat': 'DNA',
+ 'fst3': -7,
+ 'fst5': 7,
+ 'freq': 4096,
+ 'size': 11,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 3,
+ 'scd3': None,
+ 'suppl': ('I',),
+ 'scd5': None,
+ 'charac': (7, -7, None, None, 'CCANNNNNTGG'),
+ 'ovhgseq': 'NNN',
}
rest_dict['AccB7I'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCCGCTC)|(?PGAGCGG)',
- 'results' : None,
- 'site' : 'CCGCTC',
- 'substrat' : 'DNA',
- 'fst3' : -3,
- 'fst5' : 3,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 0,
- 'scd3' : None,
- 'suppl' : ('I', 'V'),
- 'scd5' : None,
- 'charac' : (3, -3, None, None, 'CCGCTC'),
- 'ovhgseq' : '',
+ 'compsite': '(?PCCGCTC)|(?PGAGCGG)',
+ 'results': None,
+ 'site': 'CCGCTC',
+ 'substrat': 'DNA',
+ 'fst3': -3,
+ 'fst5': 3,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 0,
+ 'scd3': None,
+ 'suppl': ('I',),
+ 'scd5': None,
+ 'charac': (3, -3, None, None, 'CCGCTC'),
+ 'ovhgseq': '',
}
rest_dict['AccBSI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGT[AC][GT]AC)|(?PGT[AC][GT]AC)',
- 'results' : None,
- 'site' : 'GTMKAC',
- 'substrat' : 'DNA',
- 'fst3' : -2,
- 'fst5' : 2,
- 'freq' : 1024,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -2,
- 'scd3' : None,
- 'suppl' : ('B', 'J', 'K', 'M', 'N', 'O', 'R', 'S', 'U', 'W', 'X'),
- 'scd5' : None,
- 'charac' : (2, -2, None, None, 'GTMKAC'),
- 'ovhgseq' : 'MK',
+ 'compsite': '(?PGT[AC][GT]AC)',
+ 'results': None,
+ 'site': 'GTMKAC',
+ 'substrat': 'DNA',
+ 'fst3': -2,
+ 'fst5': 2,
+ 'freq': 1024,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -2,
+ 'scd3': None,
+ 'suppl': ('B', 'J', 'K', 'M', 'N', 'Q', 'R', 'S', 'U', 'X'),
+ 'scd5': None,
+ 'charac': (2, -2, None, None, 'GTMKAC'),
+ 'ovhgseq': 'MK',
}
rest_dict['AccI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCGCG)|(?PCGCG)',
- 'results' : None,
- 'site' : 'CGCG',
- 'substrat' : 'DNA',
- 'fst3' : -2,
- 'fst5' : 2,
- 'freq' : 256,
- 'size' : 4,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 0,
- 'scd3' : None,
- 'suppl' : ('J', 'K'),
- 'scd5' : None,
- 'charac' : (2, -2, None, None, 'CGCG'),
- 'ovhgseq' : '',
+ 'compsite': '(?PCGCG)',
+ 'results': None,
+ 'site': 'CGCG',
+ 'substrat': 'DNA',
+ 'fst3': -2,
+ 'fst5': 2,
+ 'freq': 256,
+ 'size': 4,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 0,
+ 'scd3': None,
+ 'suppl': ('J', 'K'),
+ 'scd5': None,
+ 'charac': (2, -2, None, None, 'CGCG'),
+ 'ovhgseq': '',
}
rest_dict['AccII'] = _temp()
def _temp():
return {
- 'compsite' : '(?PTCCGGA)|(?PTCCGGA)',
- 'results' : None,
- 'site' : 'TCCGGA',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('J', 'K', 'R', 'W'),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'TCCGGA'),
- 'ovhgseq' : 'CCGG',
+ 'compsite': '(?PTCCGGA)',
+ 'results': None,
+ 'site': 'TCCGGA',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('J', 'K', 'R'),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'TCCGGA'),
+ 'ovhgseq': 'CCGG',
}
rest_dict['AccIII'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCAGCTC)|(?PGAGCTG)',
- 'results' : None,
- 'site' : 'CAGCTC',
- 'substrat' : 'DNA',
- 'fst3' : 11,
- 'fst5' : 13,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : (),
- 'scd5' : None,
- 'charac' : (13, 11, None, None, 'CAGCTC'),
- 'ovhgseq' : 'NNNN',
+ 'compsite': '(?PCAGCTC)|(?PGAGCTG)',
+ 'results': None,
+ 'site': 'CAGCTC',
+ 'substrat': 'DNA',
+ 'fst3': 11,
+ 'fst5': 13,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': (),
+ 'scd5': None,
+ 'charac': (13, 11, None, None, 'CAGCTC'),
+ 'ovhgseq': 'NNNN',
}
rest_dict['AceIII'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCCGC)|(?PGCGG)',
- 'results' : None,
- 'site' : 'CCGC',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 256,
- 'size' : 4,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -2,
- 'scd3' : None,
- 'suppl' : ('N',),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'CCGC'),
- 'ovhgseq' : 'CG',
+ 'compsite': '(?PCCGC)|(?PGCGG)',
+ 'results': None,
+ 'site': 'CCGC',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 256,
+ 'size': 4,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -2,
+ 'scd3': None,
+ 'suppl': ('N',),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'CCGC'),
+ 'ovhgseq': 'CG',
}
rest_dict['AciI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PAACGTT)|(?PAACGTT)',
- 'results' : None,
- 'site' : 'AACGTT',
- 'substrat' : 'DNA',
- 'fst3' : -2,
- 'fst5' : 2,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -2,
- 'scd3' : None,
- 'suppl' : ('I', 'N', 'V'),
- 'scd5' : None,
- 'charac' : (2, -2, None, None, 'AACGTT'),
- 'ovhgseq' : 'CG',
+ 'compsite': '(?PAACGTT)',
+ 'results': None,
+ 'site': 'AACGTT',
+ 'substrat': 'DNA',
+ 'fst3': -2,
+ 'fst5': 2,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -2,
+ 'scd3': None,
+ 'suppl': ('I', 'N'),
+ 'scd5': None,
+ 'charac': (2, -2, None, None, 'AACGTT'),
+ 'ovhgseq': 'CG',
}
rest_dict['AclI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGGATC)|(?PGATCC)',
- 'results' : None,
- 'site' : 'GGATC',
- 'substrat' : 'DNA',
- 'fst3' : 5,
- 'fst5' : 9,
- 'freq' : 1024,
- 'size' : 5,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -1,
- 'scd3' : None,
- 'suppl' : ('I',),
- 'scd5' : None,
- 'charac' : (9, 5, None, None, 'GGATC'),
- 'ovhgseq' : 'N',
+ 'compsite': '(?PGGATC)|(?PGATCC)',
+ 'results': None,
+ 'site': 'GGATC',
+ 'substrat': 'DNA',
+ 'fst3': 5,
+ 'fst5': 9,
+ 'freq': 1024,
+ 'size': 5,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -1,
+ 'scd3': None,
+ 'suppl': ('I',),
+ 'scd5': None,
+ 'charac': (9, 5, None, None, 'GGATC'),
+ 'ovhgseq': 'N',
}
rest_dict['AclWI'] = _temp()
def _temp():
return {
- 'compsite' : '(?P[CT]GGCC[AG])|(?P[CT]GGCC[AG])',
- 'results' : None,
- 'site' : 'YGGCCR',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 1024,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('I',),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'YGGCCR'),
- 'ovhgseq' : 'GGCC',
+ 'compsite': '(?P[CT]GGCC[AG])',
+ 'results': None,
+ 'site': 'YGGCCR',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 1024,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('I',),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'YGGCCR'),
+ 'ovhgseq': 'GGCC',
}
rest_dict['AcoI'] = _temp()
def _temp():
return {
- 'compsite' : '(?P[AG]AATT[CT])|(?P[AG]AATT[CT])',
- 'results' : None,
- 'site' : 'RAATTY',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 1024,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('I', 'V'),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'RAATTY'),
- 'ovhgseq' : 'AATT',
+ 'compsite': '(?P[AG]AATT[CT])',
+ 'results': None,
+ 'site': 'RAATTY',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 1024,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('I',),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'RAATTY'),
+ 'ovhgseq': 'AATT',
}
rest_dict['AcsI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCTGAAG)|(?PCTTCAG)',
- 'results' : None,
- 'site' : 'CTGAAG',
- 'substrat' : 'DNA',
- 'fst3' : 14,
- 'fst5' : 22,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 2,
- 'scd3' : None,
- 'suppl' : ('I', 'N'),
- 'scd5' : None,
- 'charac' : (22, 14, None, None, 'CTGAAG'),
- 'ovhgseq' : 'NN',
+ 'compsite': '(?PCTGAAG)|(?PCTTCAG)',
+ 'results': None,
+ 'site': 'CTGAAG',
+ 'substrat': 'DNA',
+ 'fst3': 14,
+ 'fst5': 22,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 2,
+ 'scd3': None,
+ 'suppl': ('I', 'N'),
+ 'scd5': None,
+ 'charac': (22, 14, None, None, 'CTGAAG'),
+ 'ovhgseq': 'NN',
}
rest_dict['AcuI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCACGTG)|(?PCACGTG)',
- 'results' : None,
- 'site' : 'CACGTG',
- 'substrat' : 'DNA',
- 'fst3' : -3,
- 'fst5' : 3,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 0,
- 'scd3' : None,
- 'suppl' : ('Q', 'X'),
- 'scd5' : None,
- 'charac' : (3, -3, None, None, 'CACGTG'),
- 'ovhgseq' : '',
+ 'compsite': '(?PCACGTG)',
+ 'results': None,
+ 'site': 'CACGTG',
+ 'substrat': 'DNA',
+ 'fst3': -3,
+ 'fst5': 3,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 0,
+ 'scd3': None,
+ 'suppl': ('Q', 'X'),
+ 'scd5': None,
+ 'charac': (3, -3, None, None, 'CACGTG'),
+ 'ovhgseq': '',
}
rest_dict['AcvI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PG[AG]CG[CT]C)|(?PG[AG]CG[CT]C)',
- 'results' : None,
- 'site' : 'GRCGYC',
- 'substrat' : 'DNA',
- 'fst3' : -2,
- 'fst5' : 2,
- 'freq' : 1024,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -2,
- 'scd3' : None,
- 'suppl' : ('J', 'M'),
- 'scd5' : None,
- 'charac' : (2, -2, None, None, 'GRCGYC'),
- 'ovhgseq' : 'CG',
+ 'compsite': '(?PG[AG]CG[CT]C)',
+ 'results': None,
+ 'site': 'GRCGYC',
+ 'substrat': 'DNA',
+ 'fst3': -2,
+ 'fst5': 2,
+ 'freq': 1024,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -2,
+ 'scd3': None,
+ 'suppl': ('J',),
+ 'scd5': None,
+ 'charac': (2, -2, None, None, 'GRCGYC'),
+ 'ovhgseq': 'CG',
}
rest_dict['AcyI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCAC...GTG)|(?PCAC...GTG)',
- 'results' : None,
- 'site' : 'CACNNNGTG',
- 'substrat' : 'DNA',
- 'fst3' : -6,
- 'fst5' : 6,
- 'freq' : 4096,
- 'size' : 9,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 3,
- 'scd3' : None,
- 'suppl' : ('F',),
- 'scd5' : None,
- 'charac' : (6, -6, None, None, 'CACNNNGTG'),
- 'ovhgseq' : 'NNN',
+ 'compsite': '(?PCAC...GTG)',
+ 'results': None,
+ 'site': 'CACNNNGTG',
+ 'substrat': 'DNA',
+ 'fst3': -6,
+ 'fst5': 6,
+ 'freq': 4096,
+ 'size': 9,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 3,
+ 'scd3': None,
+ 'suppl': ('F',),
+ 'scd5': None,
+ 'charac': (6, -6, None, None, 'CACNNNGTG'),
+ 'ovhgseq': 'NNN',
}
rest_dict['AdeI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGTAC)|(?PGTAC)',
- 'results' : None,
- 'site' : 'GTAC',
- 'substrat' : 'DNA',
- 'fst3' : -2,
- 'fst5' : 2,
- 'freq' : 256,
- 'size' : 4,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 0,
- 'scd3' : None,
- 'suppl' : ('K',),
- 'scd5' : None,
- 'charac' : (2, -2, None, None, 'GTAC'),
- 'ovhgseq' : '',
+ 'compsite': '(?PGTAC)',
+ 'results': None,
+ 'site': 'GTAC',
+ 'substrat': 'DNA',
+ 'fst3': -2,
+ 'fst5': 2,
+ 'freq': 256,
+ 'size': 4,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 0,
+ 'scd3': None,
+ 'suppl': ('B', 'K'),
+ 'scd5': None,
+ 'charac': (2, -2, None, None, 'GTAC'),
+ 'ovhgseq': '',
}
rest_dict['AfaI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PAGCGCT)|(?PAGCGCT)',
- 'results' : None,
- 'site' : 'AGCGCT',
- 'substrat' : 'DNA',
- 'fst3' : -3,
- 'fst5' : 3,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 0,
- 'scd3' : None,
- 'suppl' : ('I', 'N'),
- 'scd5' : None,
- 'charac' : (3, -3, None, None, 'AGCGCT'),
- 'ovhgseq' : '',
+ 'compsite': '(?PAGCGCT)',
+ 'results': None,
+ 'site': 'AGCGCT',
+ 'substrat': 'DNA',
+ 'fst3': -3,
+ 'fst5': 3,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 0,
+ 'scd3': None,
+ 'suppl': ('I', 'N'),
+ 'scd5': None,
+ 'charac': (3, -3, None, None, 'AGCGCT'),
+ 'ovhgseq': '',
}
rest_dict['AfeI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCC.......GG)|(?PCC.......GG)',
- 'results' : None,
- 'site' : 'CCNNNNNNNGG',
- 'substrat' : 'DNA',
- 'fst3' : -7,
- 'fst5' : 7,
- 'freq' : 256,
- 'size' : 11,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 3,
- 'scd3' : None,
- 'suppl' : ('V',),
- 'scd5' : None,
- 'charac' : (7, -7, None, None, 'CCNNNNNNNGG'),
- 'ovhgseq' : 'NNN',
+ 'compsite': '(?PCC.......GG)',
+ 'results': None,
+ 'site': 'CCNNNNNNNGG',
+ 'substrat': 'DNA',
+ 'fst3': -7,
+ 'fst5': 7,
+ 'freq': 256,
+ 'size': 11,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 3,
+ 'scd3': None,
+ 'suppl': ('V',),
+ 'scd5': None,
+ 'charac': (7, -7, None, None, 'CCNNNNNNNGG'),
+ 'ovhgseq': 'NNN',
}
rest_dict['AfiI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCTTAAG)|(?PCTTAAG)',
- 'results' : None,
- 'site' : 'CTTAAG',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('J', 'K', 'N'),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'CTTAAG'),
- 'ovhgseq' : 'TTAA',
+ 'compsite': '(?PCTTAAG)',
+ 'results': None,
+ 'site': 'CTTAAG',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('J', 'K', 'N'),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'CTTAAG'),
+ 'ovhgseq': 'TTAA',
}
rest_dict['AflII'] = _temp()
def _temp():
return {
- 'compsite' : '(?PAC[AG][CT]GT)|(?PAC[AG][CT]GT)',
- 'results' : None,
- 'site' : 'ACRYGT',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 1024,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('M', 'N', 'W'),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'ACRYGT'),
- 'ovhgseq' : 'CRYG',
+ 'compsite': '(?PAC[AG][CT]GT)',
+ 'results': None,
+ 'site': 'ACRYGT',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 1024,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('M', 'N'),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'ACRYGT'),
+ 'ovhgseq': 'CRYG',
}
rest_dict['AflIII'] = _temp()
def _temp():
return {
- 'compsite' : '(?PACCGGT)|(?PACCGGT)',
- 'results' : None,
- 'site' : 'ACCGGT',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('J', 'N', 'R'),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'ACCGGT'),
- 'ovhgseq' : 'CCGG',
+ 'compsite': '(?PACCGGT)',
+ 'results': None,
+ 'site': 'ACCGGT',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('J', 'N', 'R'),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'ACCGGT'),
+ 'ovhgseq': 'CCGG',
}
rest_dict['AgeI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PTT[CG]AA)|(?PTT[CG]AA)',
- 'results' : None,
- 'site' : 'TTSAA',
- 'substrat' : 'DNA',
- 'fst3' : -3,
- 'fst5' : 3,
- 'freq' : 512,
- 'size' : 5,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 1,
- 'scd3' : None,
- 'suppl' : ('I',),
- 'scd5' : None,
- 'charac' : (3, -3, None, None, 'TTSAA'),
- 'ovhgseq' : 'S',
+ 'compsite': '(?PTT[CG]AA)',
+ 'results': None,
+ 'site': 'TTSAA',
+ 'substrat': 'DNA',
+ 'fst3': -3,
+ 'fst5': 3,
+ 'freq': 512,
+ 'size': 5,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 1,
+ 'scd3': None,
+ 'suppl': ('I',),
+ 'scd5': None,
+ 'charac': (3, -3, None, None, 'TTSAA'),
+ 'ovhgseq': 'S',
}
rest_dict['AgsI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PTTTAAA)|(?PTTTAAA)',
- 'results' : None,
- 'site' : 'TTTAAA',
- 'substrat' : 'DNA',
- 'fst3' : -3,
- 'fst5' : 3,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 0,
- 'scd3' : None,
- 'suppl' : (),
- 'scd5' : None,
- 'charac' : (3, -3, None, None, 'TTTAAA'),
- 'ovhgseq' : '',
+ 'compsite': '(?PTTTAAA)',
+ 'results': None,
+ 'site': 'TTTAAA',
+ 'substrat': 'DNA',
+ 'fst3': -3,
+ 'fst5': 3,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 0,
+ 'scd3': None,
+ 'suppl': (),
+ 'scd5': None,
+ 'charac': (3, -3, None, None, 'TTTAAA'),
+ 'ovhgseq': '',
}
rest_dict['AhaIII'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGAC.....GTC)|(?PGAC.....GTC)',
- 'results' : None,
- 'site' : 'GACNNNNNGTC',
- 'substrat' : 'DNA',
- 'fst3' : -6,
- 'fst5' : 6,
- 'freq' : 4096,
- 'size' : 11,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 1,
- 'scd3' : None,
- 'suppl' : ('N',),
- 'scd5' : None,
- 'charac' : (6, -6, None, None, 'GACNNNNNGTC'),
- 'ovhgseq' : 'N',
+ 'compsite': '(?PGAC.....GTC)',
+ 'results': None,
+ 'site': 'GACNNNNNGTC',
+ 'substrat': 'DNA',
+ 'fst3': -6,
+ 'fst5': 6,
+ 'freq': 4096,
+ 'size': 11,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 1,
+ 'scd3': None,
+ 'suppl': ('N',),
+ 'scd5': None,
+ 'charac': (6, -6, None, None, 'GACNNNNNGTC'),
+ 'ovhgseq': 'N',
}
rest_dict['AhdI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PACTAGT)|(?PACTAGT)',
- 'results' : None,
- 'site' : 'ACTAGT',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('I', 'V'),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'ACTAGT'),
- 'ovhgseq' : 'CTAG',
+ 'compsite': '(?PACTAGT)',
+ 'results': None,
+ 'site': 'ACTAGT',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('I', 'V'),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'ACTAGT'),
+ 'ovhgseq': 'CTAG',
}
rest_dict['AhlI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCACGTC)|(?PGACGTG)',
- 'results' : None,
- 'site' : 'CACGTC',
- 'substrat' : 'DNA',
- 'fst3' : -3,
- 'fst5' : 3,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 0,
- 'scd3' : None,
- 'suppl' : ('F',),
- 'scd5' : None,
- 'charac' : (3, -3, None, None, 'CACGTC'),
- 'ovhgseq' : '',
+ 'compsite': '(?PCACGTC)|(?PGACGTG)',
+ 'results': None,
+ 'site': 'CACGTC',
+ 'substrat': 'DNA',
+ 'fst3': -3,
+ 'fst5': 3,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 0,
+ 'scd3': None,
+ 'suppl': ('F',),
+ 'scd5': None,
+ 'charac': (3, -3, None, None, 'CACGTC'),
+ 'ovhgseq': '',
}
rest_dict['AjiI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCC[AT]GG)|(?PCC[AT]GG)',
- 'results' : None,
- 'site' : 'CCWGG',
- 'substrat' : 'DNA',
- 'fst3' : 0,
- 'fst5' : 0,
- 'freq' : 512,
- 'size' : 5,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -5,
- 'scd3' : None,
- 'suppl' : ('I',),
- 'scd5' : None,
- 'charac' : (0, 0, None, None, 'CCWGG'),
- 'ovhgseq' : 'CCWGG',
+ 'compsite': '(?PCC[AT]GG)',
+ 'results': None,
+ 'site': 'CCWGG',
+ 'substrat': 'DNA',
+ 'fst3': 0,
+ 'fst5': 0,
+ 'freq': 512,
+ 'size': 5,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -5,
+ 'scd3': None,
+ 'suppl': ('I',),
+ 'scd5': None,
+ 'charac': (0, 0, None, None, 'CCWGG'),
+ 'ovhgseq': 'CCWGG',
}
rest_dict['AjnI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGAA.......TTGG)|(?PCCAA.......TTC)',
- 'results' : None,
- 'site' : 'GAANNNNNNNTTGG',
- 'substrat' : 'DNA',
- 'fst3' : -26,
- 'fst5' : -7,
- 'freq' : 16384,
- 'size' : 14,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 5,
- 'scd3' : 6,
- 'suppl' : ('F',),
- 'scd5' : 25,
- 'charac' : (-7, -26, 25, 6, 'GAANNNNNNNTTGG'),
- 'ovhgseq' : 'NNNNN',
+ 'compsite': '(?PGAA.......TTGG)|(?PCCAA.......TTC)',
+ 'results': None,
+ 'site': 'GAANNNNNNNTTGG',
+ 'substrat': 'DNA',
+ 'fst3': -26,
+ 'fst5': -7,
+ 'freq': 16384,
+ 'size': 14,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 5,
+ 'scd3': 6,
+ 'suppl': ('F',),
+ 'scd5': 25,
+ 'charac': (-7, -26, 25, 6, 'GAANNNNNNNTTGG'),
+ 'ovhgseq': 'NNNNN',
}
rest_dict['AjuI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCAC....GTG)|(?PCAC....GTG)',
- 'results' : None,
- 'site' : 'CACNNNNGTG',
- 'substrat' : 'DNA',
- 'fst3' : -5,
- 'fst5' : 5,
- 'freq' : 4096,
- 'size' : 10,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 0,
- 'scd3' : None,
- 'suppl' : ('N',),
- 'scd5' : None,
- 'charac' : (5, -5, None, None, 'CACNNNNGTG'),
- 'ovhgseq' : '',
+ 'compsite': '(?PCAC....GTG)',
+ 'results': None,
+ 'site': 'CACNNNNGTG',
+ 'substrat': 'DNA',
+ 'fst3': -5,
+ 'fst5': 5,
+ 'freq': 4096,
+ 'size': 10,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 0,
+ 'scd3': None,
+ 'suppl': ('N',),
+ 'scd5': None,
+ 'charac': (5, -5, None, None, 'CACNNNNGTG'),
+ 'ovhgseq': '',
}
rest_dict['AleI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGCA......TGC)|(?PGCA......TGC)',
- 'results' : None,
- 'site' : 'GCANNNNNNTGC',
- 'substrat' : 'DNA',
- 'fst3' : -24,
- 'fst5' : -10,
- 'freq' : 4096,
- 'size' : 12,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 2,
- 'scd3' : 10,
- 'suppl' : ('F',),
- 'scd5' : 24,
- 'charac' : (-10, -24, 24, 10, 'GCANNNNNNTGC'),
- 'ovhgseq' : 'NN',
+ 'compsite': '(?PGCA......TGC)',
+ 'results': None,
+ 'site': 'GCANNNNNNTGC',
+ 'substrat': 'DNA',
+ 'fst3': -24,
+ 'fst5': -10,
+ 'freq': 4096,
+ 'size': 12,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 2,
+ 'scd3': 10,
+ 'suppl': ('F',),
+ 'scd5': 24,
+ 'charac': (-10, -24, 24, 10, 'GCANNNNNNTGC'),
+ 'ovhgseq': 'NN',
}
rest_dict['AlfI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGAAC......TCC)|(?PGGA......GTTC)',
- 'results' : None,
- 'site' : 'GAACNNNNNNTCC',
- 'substrat' : 'DNA',
- 'fst3' : -25,
- 'fst5' : -7,
- 'freq' : 16384,
- 'size' : 13,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 5,
- 'scd3' : 7,
- 'suppl' : ('F',),
- 'scd5' : 25,
- 'charac' : (-7, -25, 25, 7, 'GAACNNNNNNTCC'),
- 'ovhgseq' : 'NNNNN',
+ 'compsite': '(?PGAAC......TCC)|(?PGGA......GTTC)',
+ 'results': None,
+ 'site': 'GAACNNNNNNTCC',
+ 'substrat': 'DNA',
+ 'fst3': -25,
+ 'fst5': -7,
+ 'freq': 16384,
+ 'size': 13,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 5,
+ 'scd3': 7,
+ 'suppl': ('F',),
+ 'scd5': 25,
+ 'charac': (-7, -25, 25, 7, 'GAACNNNNNNTCC'),
+ 'ovhgseq': 'NNNNN',
}
rest_dict['AloI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PAGCT)|(?PAGCT)',
- 'results' : None,
- 'site' : 'AGCT',
- 'substrat' : 'DNA',
- 'fst3' : -2,
- 'fst5' : 2,
- 'freq' : 256,
- 'size' : 4,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 0,
- 'scd3' : None,
- 'suppl' : ('I',),
- 'scd5' : None,
- 'charac' : (2, -2, None, None, 'AGCT'),
- 'ovhgseq' : '',
+ 'compsite': '(?PAGCT)',
+ 'results': None,
+ 'site': 'AGCT',
+ 'substrat': 'DNA',
+ 'fst3': -2,
+ 'fst5': 2,
+ 'freq': 256,
+ 'size': 4,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 0,
+ 'scd3': None,
+ 'suppl': ('I',),
+ 'scd5': None,
+ 'charac': (2, -2, None, None, 'AGCT'),
+ 'ovhgseq': '',
}
rest_dict['AluBI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PAGCT)|(?PAGCT)',
- 'results' : None,
- 'site' : 'AGCT',
- 'substrat' : 'DNA',
- 'fst3' : -2,
- 'fst5' : 2,
- 'freq' : 256,
- 'size' : 4,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 0,
- 'scd3' : None,
- 'suppl' : ('B', 'C', 'F', 'H', 'I', 'J', 'K', 'M', 'N', 'O', 'Q', 'R', 'S', 'U', 'V', 'W', 'X', 'Y'),
- 'scd5' : None,
- 'charac' : (2, -2, None, None, 'AGCT'),
- 'ovhgseq' : '',
+ 'compsite': '(?PAGCT)',
+ 'results': None,
+ 'site': 'AGCT',
+ 'substrat': 'DNA',
+ 'fst3': -2,
+ 'fst5': 2,
+ 'freq': 256,
+ 'size': 4,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 0,
+ 'scd3': None,
+ 'suppl': ('B', 'C', 'F', 'I', 'J', 'K', 'M', 'N', 'O', 'Q', 'R', 'S', 'U', 'V', 'X', 'Y'),
+ 'scd5': None,
+ 'charac': (2, -2, None, None, 'AGCT'),
+ 'ovhgseq': '',
}
rest_dict['AluI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PG[AT]GC[AT]C)|(?PG[AT]GC[AT]C)',
- 'results' : None,
- 'site' : 'GWGCWC',
- 'substrat' : 'DNA',
- 'fst3' : -5,
- 'fst5' : 5,
- 'freq' : 1024,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 4,
- 'scd3' : None,
- 'suppl' : ('F',),
- 'scd5' : None,
- 'charac' : (5, -5, None, None, 'GWGCWC'),
- 'ovhgseq' : 'WGCW',
+ 'compsite': '(?PG[AT]GC[AT]C)',
+ 'results': None,
+ 'site': 'GWGCWC',
+ 'substrat': 'DNA',
+ 'fst3': -5,
+ 'fst5': 5,
+ 'freq': 1024,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 4,
+ 'scd3': None,
+ 'suppl': ('F',),
+ 'scd5': None,
+ 'charac': (5, -5, None, None, 'GWGCWC'),
+ 'ovhgseq': 'WGCW',
}
rest_dict['Alw21I'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGTCTC)|(?PGAGAC)',
- 'results' : None,
- 'site' : 'GTCTC',
- 'substrat' : 'DNA',
- 'fst3' : 5,
- 'fst5' : 6,
- 'freq' : 1024,
- 'size' : 5,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('F',),
- 'scd5' : None,
- 'charac' : (6, 5, None, None, 'GTCTC'),
- 'ovhgseq' : 'NNNN',
+ 'compsite': '(?PGTCTC)|(?PGAGAC)',
+ 'results': None,
+ 'site': 'GTCTC',
+ 'substrat': 'DNA',
+ 'fst3': 5,
+ 'fst5': 6,
+ 'freq': 1024,
+ 'size': 5,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('F',),
+ 'scd5': None,
+ 'charac': (6, 5, None, None, 'GTCTC'),
+ 'ovhgseq': 'NNNN',
}
rest_dict['Alw26I'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGTGCAC)|(?PGTGCAC)',
- 'results' : None,
- 'site' : 'GTGCAC',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('F', 'J', 'O', 'R'),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'GTGCAC'),
- 'ovhgseq' : 'TGCA',
+ 'compsite': '(?PGTGCAC)',
+ 'results': None,
+ 'site': 'GTGCAC',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('F', 'J'),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'GTGCAC'),
+ 'ovhgseq': 'TGCA',
}
rest_dict['Alw44I'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGAAA[CT].....[AG]TG)|(?PCA[CT].....[AG]TTTC)',
- 'results' : None,
- 'site' : 'GAAAYNNNNNRTG',
- 'substrat' : 'DNA',
- 'fst3' : None,
- 'fst5' : None,
- 'freq' : 16384,
- 'size' : 13,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : None,
- 'scd3' : None,
- 'suppl' : (),
- 'scd5' : None,
- 'charac' : (None, None, None, None, 'GAAAYNNNNNRTG'),
- 'ovhgseq' : None,
+ 'compsite': '(?PGAAA[CT].....[AG]TG)|(?PCA[CT].....[AG]TTTC)',
+ 'results': None,
+ 'site': 'GAAAYNNNNNRTG',
+ 'substrat': 'DNA',
+ 'fst3': None,
+ 'fst5': None,
+ 'freq': 16384,
+ 'size': 13,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': None,
+ 'scd3': None,
+ 'suppl': (),
+ 'scd5': None,
+ 'charac': (None, None, None, None, 'GAAAYNNNNNRTG'),
+ 'ovhgseq': None,
}
rest_dict['AlwFI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGGATC)|(?PGATCC)',
- 'results' : None,
- 'site' : 'GGATC',
- 'substrat' : 'DNA',
- 'fst3' : 5,
- 'fst5' : 9,
- 'freq' : 1024,
- 'size' : 5,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -1,
- 'scd3' : None,
- 'suppl' : ('N',),
- 'scd5' : None,
- 'charac' : (9, 5, None, None, 'GGATC'),
- 'ovhgseq' : 'N',
+ 'compsite': '(?PGGATC)|(?PGATCC)',
+ 'results': None,
+ 'site': 'GGATC',
+ 'substrat': 'DNA',
+ 'fst3': 5,
+ 'fst5': 9,
+ 'freq': 1024,
+ 'size': 5,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -1,
+ 'scd3': None,
+ 'suppl': ('N',),
+ 'scd5': None,
+ 'charac': (9, 5, None, None, 'GGATC'),
+ 'ovhgseq': 'N',
}
rest_dict['AlwI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PCAG...CTG)|(?PCAG...CTG)',
- 'results' : None,
- 'site' : 'CAGNNNCTG',
- 'substrat' : 'DNA',
- 'fst3' : -6,
- 'fst5' : 6,
- 'freq' : 4096,
- 'size' : 9,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 3,
- 'scd3' : None,
- 'suppl' : ('N',),
- 'scd5' : None,
- 'charac' : (6, -6, None, None, 'CAGNNNCTG'),
- 'ovhgseq' : 'NNN',
+ 'compsite': '(?PCAG...CTG)',
+ 'results': None,
+ 'site': 'CAGNNNCTG',
+ 'substrat': 'DNA',
+ 'fst3': -6,
+ 'fst5': 6,
+ 'freq': 4096,
+ 'size': 9,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 3,
+ 'scd3': None,
+ 'suppl': ('N',),
+ 'scd5': None,
+ 'charac': (6, -6, None, None, 'CAGNNNCTG'),
+ 'ovhgseq': 'NNN',
}
rest_dict['AlwNI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PC[CT]CG[AG]G)|(?PC[CT]CG[AG]G)',
- 'results' : None,
- 'site' : 'CYCGRG',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 1024,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('I', 'V'),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'CYCGRG'),
- 'ovhgseq' : 'YCGR',
+ 'compsite': '(?PC[CT]CG[AG]G)',
+ 'results': None,
+ 'site': 'CYCGRG',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 1024,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('I', 'V'),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'CYCGRG'),
+ 'ovhgseq': 'YCGR',
}
rest_dict['Ama87I'] = _temp()
def _temp():
return {
- 'compsite' : '(?PTCCGGA)|(?PTCCGGA)',
- 'results' : None,
- 'site' : 'TCCGGA',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('K',),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'TCCGGA'),
- 'ovhgseq' : 'CCGG',
+ 'compsite': '(?PTCCGGA)',
+ 'results': None,
+ 'site': 'TCCGGA',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('K',),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'TCCGGA'),
+ 'ovhgseq': 'CCGG',
}
rest_dict['Aor13HI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PAGCGCT)|(?PAGCGCT)',
- 'results' : None,
- 'site' : 'AGCGCT',
- 'substrat' : 'DNA',
- 'fst3' : -3,
- 'fst5' : 3,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 0,
- 'scd3' : None,
- 'suppl' : ('K',),
- 'scd5' : None,
- 'charac' : (3, -3, None, None, 'AGCGCT'),
- 'ovhgseq' : '',
+ 'compsite': '(?PAGCGCT)',
+ 'results': None,
+ 'site': 'AGCGCT',
+ 'substrat': 'DNA',
+ 'fst3': -3,
+ 'fst5': 3,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 0,
+ 'scd3': None,
+ 'suppl': ('K',),
+ 'scd5': None,
+ 'charac': (3, -3, None, None, 'AGCGCT'),
+ 'ovhgseq': '',
}
rest_dict['Aor51HI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGCA.....TGC)|(?PGCA.....TGC)',
- 'results' : None,
- 'site' : 'GCANNNNNTGC',
- 'substrat' : 'DNA',
- 'fst3' : -8,
- 'fst5' : 8,
- 'freq' : 4096,
- 'size' : 11,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 5,
- 'scd3' : None,
- 'suppl' : (),
- 'scd5' : None,
- 'charac' : (8, -8, None, None, 'GCANNNNNTGC'),
- 'ovhgseq' : 'NNNNN',
+ 'compsite': '(?PGGCC)',
+ 'results': None,
+ 'site': 'GGCC',
+ 'substrat': 'DNA',
+ 'fst3': 0,
+ 'fst5': 0,
+ 'freq': 256,
+ 'size': 4,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': (),
+ 'scd5': None,
+ 'charac': (0, 0, None, None, 'GGCC'),
+ 'ovhgseq': 'GGCC',
+ }
+rest_dict['AoxI'] = _temp()
+
+def _temp():
+ return {
+ 'compsite': '(?PGCA.....TGC)',
+ 'results': None,
+ 'site': 'GCANNNNNTGC',
+ 'substrat': 'DNA',
+ 'fst3': -8,
+ 'fst5': 8,
+ 'freq': 4096,
+ 'size': 11,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 5,
+ 'scd3': None,
+ 'suppl': (),
+ 'scd5': None,
+ 'charac': (8, -8, None, None, 'GCANNNNNTGC'),
+ 'ovhgseq': 'NNNNN',
}
rest_dict['ApaBI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGGGCCC)|(?PGGGCCC)',
- 'results' : None,
- 'site' : 'GGGCCC',
- 'substrat' : 'DNA',
- 'fst3' : -5,
- 'fst5' : 5,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 4,
- 'scd3' : None,
- 'suppl' : ('B', 'F', 'I', 'J', 'K', 'M', 'N', 'O', 'Q', 'R', 'S', 'U', 'V', 'W', 'X'),
- 'scd5' : None,
- 'charac' : (5, -5, None, None, 'GGGCCC'),
- 'ovhgseq' : 'GGCC',
+ 'compsite': '(?PGGGCCC)',
+ 'results': None,
+ 'site': 'GGGCCC',
+ 'substrat': 'DNA',
+ 'fst3': -5,
+ 'fst5': 5,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 4,
+ 'scd3': None,
+ 'suppl': ('B', 'F', 'I', 'J', 'K', 'M', 'N', 'Q', 'R', 'S', 'U', 'V', 'X'),
+ 'scd5': None,
+ 'charac': (5, -5, None, None, 'GGGCCC'),
+ 'ovhgseq': 'GGCC',
}
rest_dict['ApaI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGTGCAC)|(?PGTGCAC)',
- 'results' : None,
- 'site' : 'GTGCAC',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('C', 'K', 'N', 'U'),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'GTGCAC'),
- 'ovhgseq' : 'TGCA',
+ 'compsite': '(?PGTGCAC)',
+ 'results': None,
+ 'site': 'GTGCAC',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('C', 'K', 'N', 'U'),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'GTGCAC'),
+ 'ovhgseq': 'TGCA',
}
rest_dict['ApaLI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGC[AT]GC)|(?PGC[AT]GC)',
- 'results' : None,
- 'site' : 'GCWGC',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 512,
- 'size' : 5,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -3,
- 'scd3' : None,
- 'suppl' : ('N',),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'GCWGC'),
- 'ovhgseq' : 'CWG',
+ 'compsite': '(?PGC[AT]GC)',
+ 'results': None,
+ 'site': 'GCWGC',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 512,
+ 'size': 5,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -3,
+ 'scd3': None,
+ 'suppl': ('N',),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'GCWGC'),
+ 'ovhgseq': 'CWG',
}
rest_dict['ApeKI'] = _temp()
def _temp():
return {
- 'compsite' : '(?P[AG]AATT[CT])|(?P[AG]AATT[CT])',
- 'results' : None,
- 'site' : 'RAATTY',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 1024,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('N',),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'RAATTY'),
- 'ovhgseq' : 'AATT',
+ 'compsite': '(?P[AG]AATT[CT])',
+ 'results': None,
+ 'site': 'RAATTY',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 1024,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('N',),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'RAATTY'),
+ 'ovhgseq': 'AATT',
}
rest_dict['ApoI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PATCGAC)|(?PGTCGAT)',
- 'results' : None,
- 'site' : 'ATCGAC',
- 'substrat' : 'DNA',
- 'fst3' : 18,
- 'fst5' : 26,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 2,
- 'scd3' : None,
- 'suppl' : (),
- 'scd5' : None,
- 'charac' : (26, 18, None, None, 'ATCGAC'),
- 'ovhgseq' : 'NN',
+ 'compsite': '(?PATCGAC)|(?PGTCGAT)',
+ 'results': None,
+ 'site': 'ATCGAC',
+ 'substrat': 'DNA',
+ 'fst3': 18,
+ 'fst5': 26,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 2,
+ 'scd3': None,
+ 'suppl': (),
+ 'scd5': None,
+ 'charac': (26, 18, None, None, 'ATCGAC'),
+ 'ovhgseq': 'NN',
}
rest_dict['ApyPI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGCCG.AC)|(?PGT.CGGC)',
- 'results' : None,
- 'site' : 'GCCGNAC',
- 'substrat' : 'DNA',
- 'fst3' : 18,
- 'fst5' : 27,
- 'freq' : 4096,
- 'size' : 7,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 2,
- 'scd3' : None,
- 'suppl' : (),
- 'scd5' : None,
- 'charac' : (27, 18, None, None, 'GCCGNAC'),
- 'ovhgseq' : 'NN',
+ 'compsite': '(?PGCCG.AC)|(?PGT.CGGC)',
+ 'results': None,
+ 'site': 'GCCGNAC',
+ 'substrat': 'DNA',
+ 'fst3': 18,
+ 'fst5': 27,
+ 'freq': 4096,
+ 'size': 7,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 2,
+ 'scd3': None,
+ 'suppl': (),
+ 'scd5': None,
+ 'charac': (27, 18, None, None, 'GCCGNAC'),
+ 'ovhgseq': 'NN',
}
rest_dict['AquII'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGAGGAG)|(?PCTCCTC)',
- 'results' : None,
- 'site' : 'GAGGAG',
- 'substrat' : 'DNA',
- 'fst3' : 18,
- 'fst5' : 26,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 2,
- 'scd3' : None,
- 'suppl' : (),
- 'scd5' : None,
- 'charac' : (26, 18, None, None, 'GAGGAG'),
- 'ovhgseq' : 'NN',
+ 'compsite': '(?PGAGGAG)|(?PCTCCTC)',
+ 'results': None,
+ 'site': 'GAGGAG',
+ 'substrat': 'DNA',
+ 'fst3': 18,
+ 'fst5': 26,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 2,
+ 'scd3': None,
+ 'suppl': (),
+ 'scd5': None,
+ 'charac': (26, 18, None, None, 'GAGGAG'),
+ 'ovhgseq': 'NN',
}
rest_dict['AquIII'] = _temp()
def _temp():
return {
- 'compsite' : '(?PG[AG]GGAAG)|(?PCTTCC[CT]C)',
- 'results' : None,
- 'site' : 'GRGGAAG',
- 'substrat' : 'DNA',
- 'fst3' : 17,
- 'fst5' : 26,
- 'freq' : 8192,
- 'size' : 7,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 2,
- 'scd3' : None,
- 'suppl' : (),
- 'scd5' : None,
- 'charac' : (26, 17, None, None, 'GRGGAAG'),
- 'ovhgseq' : 'NN',
+ 'compsite': '(?PG[AG]GGAAG)|(?PCTTCC[CT]C)',
+ 'results': None,
+ 'site': 'GRGGAAG',
+ 'substrat': 'DNA',
+ 'fst3': 17,
+ 'fst5': 26,
+ 'freq': 8192,
+ 'size': 7,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 2,
+ 'scd3': None,
+ 'suppl': (),
+ 'scd5': None,
+ 'charac': (26, 17, None, None, 'GRGGAAG'),
+ 'ovhgseq': 'NN',
}
rest_dict['AquIV'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGAC......TT[CT]G)|(?PC[AG]AA......GTC)',
- 'results' : None,
- 'site' : 'GACNNNNNNTTYG',
- 'substrat' : 'DNA',
- 'fst3' : -26,
- 'fst5' : -8,
- 'freq' : 8192,
- 'size' : 13,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 5,
- 'scd3' : 6,
- 'suppl' : ('I',),
- 'scd5' : 24,
- 'charac' : (-8, -26, 24, 6, 'GACNNNNNNTTYG'),
- 'ovhgseq' : 'NNNNN',
+ 'compsite': '(?PGAC......TT[CT]G)|(?PC[AG]AA......GTC)',
+ 'results': None,
+ 'site': 'GACNNNNNNTTYG',
+ 'substrat': 'DNA',
+ 'fst3': -26,
+ 'fst5': -8,
+ 'freq': 8192,
+ 'size': 13,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 5,
+ 'scd3': 6,
+ 'suppl': ('I',),
+ 'scd5': 24,
+ 'charac': (-8, -26, 24, 6, 'GACNNNNNNTTYG'),
+ 'ovhgseq': 'NNNNN',
}
rest_dict['ArsI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGGCGCGCC)|(?PGGCGCGCC)',
- 'results' : None,
- 'site' : 'GGCGCGCC',
- 'substrat' : 'DNA',
- 'fst3' : -2,
- 'fst5' : 2,
- 'freq' : 65536,
- 'size' : 8,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('N', 'W'),
- 'scd5' : None,
- 'charac' : (2, -2, None, None, 'GGCGCGCC'),
- 'ovhgseq' : 'CGCG',
+ 'compsite': '(?PGGCGCGCC)',
+ 'results': None,
+ 'site': 'GGCGCGCC',
+ 'substrat': 'DNA',
+ 'fst3': -2,
+ 'fst5': 2,
+ 'freq': 65536,
+ 'size': 8,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('N',),
+ 'scd5': None,
+ 'charac': (2, -2, None, None, 'GGCGCGCC'),
+ 'ovhgseq': 'CGCG',
}
rest_dict['AscI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PATTAAT)|(?PATTAAT)',
- 'results' : None,
- 'site' : 'ATTAAT',
- 'substrat' : 'DNA',
- 'fst3' : -2,
- 'fst5' : 2,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -2,
- 'scd3' : None,
- 'suppl' : ('J', 'N', 'O'),
- 'scd5' : None,
- 'charac' : (2, -2, None, None, 'ATTAAT'),
- 'ovhgseq' : 'TA',
+ 'compsite': '(?PATTAAT)',
+ 'results': None,
+ 'site': 'ATTAAT',
+ 'substrat': 'DNA',
+ 'fst3': -2,
+ 'fst5': 2,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -2,
+ 'scd3': None,
+ 'suppl': ('J', 'N', 'O'),
+ 'scd5': None,
+ 'charac': (2, -2, None, None, 'ATTAAT'),
+ 'ovhgseq': 'TA',
}
rest_dict['AseI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGATC)|(?PGATC)',
- 'results' : None,
- 'site' : 'GATC',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 256,
- 'size' : 4,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -2,
- 'scd3' : None,
- 'suppl' : (),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'GATC'),
- 'ovhgseq' : 'AT',
+ 'compsite': '(?PGATC)',
+ 'results': None,
+ 'site': 'GATC',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 256,
+ 'size': 4,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -2,
+ 'scd3': None,
+ 'suppl': (),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'GATC'),
+ 'ovhgseq': 'AT',
}
rest_dict['Asi256I'] = _temp()
def _temp():
return {
- 'compsite' : '(?PACCGGT)|(?PACCGGT)',
- 'results' : None,
- 'site' : 'ACCGGT',
- 'substrat' : 'DNA',
- 'fst3' : -1,
- 'fst5' : 1,
- 'freq' : 4096,
- 'size' : 6,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : -4,
- 'scd3' : None,
- 'suppl' : ('I', 'V'),
- 'scd5' : None,
- 'charac' : (1, -1, None, None, 'ACCGGT'),
- 'ovhgseq' : 'CCGG',
+ 'compsite': '(?PACCGGT)',
+ 'results': None,
+ 'site': 'ACCGGT',
+ 'substrat': 'DNA',
+ 'fst3': -1,
+ 'fst5': 1,
+ 'freq': 4096,
+ 'size': 6,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': -4,
+ 'scd3': None,
+ 'suppl': ('I', 'V'),
+ 'scd5': None,
+ 'charac': (1, -1, None, None, 'ACCGGT'),
+ 'ovhgseq': 'CCGG',
}
rest_dict['AsiGI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGCGATCGC)|(?PGCGATCGC)',
- 'results' : None,
- 'site' : 'GCGATCGC',
- 'substrat' : 'DNA',
- 'fst3' : -5,
- 'fst5' : 5,
- 'freq' : 65536,
- 'size' : 8,
- 'opt_temp' : 37,
- 'dna' : None,
- 'inact_temp' : 65,
- 'ovhg' : 2,
- 'scd3' : None,
- 'suppl' : ('N',),
- 'scd5' : None,
- 'charac' : (5, -5, None, None, 'GCGATCGC'),
- 'ovhgseq' : 'AT',
+ 'compsite': '(?PGCGATCGC)',
+ 'results': None,
+ 'site': 'GCGATCGC',
+ 'substrat': 'DNA',
+ 'fst3': -5,
+ 'fst5': 5,
+ 'freq': 65536,
+ 'size': 8,
+ 'opt_temp': 37,
+ 'dna': None,
+ 'inact_temp': 65,
+ 'ovhg': 2,
+ 'scd3': None,
+ 'suppl': ('I', 'N'),
+ 'scd5': None,
+ 'charac': (5, -5, None, None, 'GCGATCGC'),
+ 'ovhgseq': 'AT',
}
rest_dict['AsiSI'] = _temp()
def _temp():
return {
- 'compsite' : '(?PGAA....TTC)|(?P