mirror of
https://gitee.com/coder-xiaomo/leetcode-problemset
synced 2025-09-05 23:41:41 +08:00
87 lines
17 KiB
JSON
87 lines
17 KiB
JSON
{
|
||
"data": {
|
||
"question": {
|
||
"questionId": "3816",
|
||
"questionFrontendId": "3475",
|
||
"categoryTitle": "Database",
|
||
"boundTopicId": 3593036,
|
||
"title": "DNA Pattern Recognition ",
|
||
"titleSlug": "dna-pattern-recognition",
|
||
"content": "<p>Table: <code>Samples</code></p>\n\n<pre>\n+----------------+---------+\n| Column Name | Type | \n+----------------+---------+\n| sample_id | int |\n| dna_sequence | varchar |\n| species | varchar |\n+----------------+---------+\nsample_id is the unique key for this table.\nEach row contains a DNA sequence represented as a string of characters (A, T, G, C) and the species it was collected from.\n</pre>\n\n<p>Biologists are studying basic patterns in DNA sequences. Write a solution to identify <code>sample_id</code> with the following patterns:</p>\n\n<ul>\n\t<li>Sequences that <strong>start</strong> with <strong>ATG</strong> (a common <strong>start codon</strong>)</li>\n\t<li>Sequences that <strong>end</strong> with either <strong>TAA</strong>, <strong>TAG</strong>, or <strong>TGA</strong> (<strong>stop codons</strong>)</li>\n\t<li>Sequences containing the motif <strong>ATAT</strong> (a simple repeated pattern)</li>\n\t<li>Sequences that have <strong>at least</strong> <code>3</code> <strong>consecutive</strong> <strong>G</strong> (like <strong>GGG</strong> or <strong>GGGG</strong>)</li>\n</ul>\n\n<p>Return <em>the result table ordered by </em><em>sample_id in <strong>ascending</strong> order</em>.</p>\n\n<p>The result format is in the following example.</p>\n\n<p> </p>\n<p><strong class=\"example\">Example:</strong></p>\n\n<div class=\"example-block\">\n<p><strong>Input:</strong></p>\n\n<p>Samples table:</p>\n\n<pre class=\"example-io\">\n+-----------+------------------+-----------+\n| sample_id | dna_sequence | species |\n+-----------+------------------+-----------+\n| 1 | ATGCTAGCTAGCTAA | Human |\n| 2 | GGGTCAATCATC | Human |\n| 3 | ATATATCGTAGCTA | Human |\n| 4 | ATGGGGTCATCATAA | Mouse |\n| 5 | TCAGTCAGTCAG | Mouse |\n| 6 | ATATCGCGCTAG | Zebrafish |\n| 7 | CGTATGCGTCGTA | Zebrafish |\n+-----------+------------------+-----------+\n</pre>\n\n<p><strong>Output:</strong></p>\n\n<pre class=\"example-io\">\n+-----------+------------------+-------------+-------------+------------+------------+------------+\n| sample_id | dna_sequence | species | has_start | has_stop | has_atat | has_ggg |\n+-----------+------------------+-------------+-------------+------------+------------+------------+\n| 1 | ATGCTAGCTAGCTAA | Human | 1 | 1 | 0 | 0 |\n| 2 | GGGTCAATCATC | Human | 0 | 0 | 0 | 1 |\n| 3 | ATATATCGTAGCTA | Human | 0 | 0 | 1 | 0 |\n| 4 | ATGGGGTCATCATAA | Mouse | 1 | 1 | 0 | 1 |\n| 5 | TCAGTCAGTCAG | Mouse | 0 | 0 | 0 | 0 |\n| 6 | ATATCGCGCTAG | Zebrafish | 0 | 1 | 1 | 0 |\n| 7 | CGTATGCGTCGTA | Zebrafish | 0 | 0 | 0 | 0 |\n+-----------+------------------+-------------+-------------+------------+------------+------------+\n</pre>\n\n<p><strong>Explanation:</strong></p>\n\n<ul>\n\t<li>Sample 1 (ATGCTAGCTAGCTAA):\n\t<ul>\n\t\t<li>Starts with ATG (has_start = 1)</li>\n\t\t<li>Ends with TAA (has_stop = 1)</li>\n\t\t<li>Does not contain ATAT (has_atat = 0)</li>\n\t\t<li>Does not contain at least 3 consecutive 'G's (has_ggg = 0)</li>\n\t</ul>\n\t</li>\n\t<li>Sample 2 (GGGTCAATCATC):\n\t<ul>\n\t\t<li>Does not start with ATG (has_start = 0)</li>\n\t\t<li>Does not end with TAA, TAG, or TGA (has_stop = 0)</li>\n\t\t<li>Does not contain ATAT (has_atat = 0)</li>\n\t\t<li>Contains GGG (has_ggg = 1)</li>\n\t</ul>\n\t</li>\n\t<li>Sample 3 (ATATATCGTAGCTA):\n\t<ul>\n\t\t<li>Does not start with ATG (has_start = 0)</li>\n\t\t<li>Does not end with TAA, TAG, or TGA (has_stop = 0)</li>\n\t\t<li>Contains ATAT (has_atat = 1)</li>\n\t\t<li>Does not contain at least 3 consecutive 'G's (has_ggg = 0)</li>\n\t</ul>\n\t</li>\n\t<li>Sample 4 (ATGGGGTCATCATAA):\n\t<ul>\n\t\t<li>Starts with ATG (has_start = 1)</li>\n\t\t<li>Ends with TAA (has_stop = 1)</li>\n\t\t<li>Does not contain ATAT (has_atat = 0)</li>\n\t\t<li>Contains GGGG (has_ggg = 1)</li>\n\t</ul>\n\t</li>\n\t<li>Sample 5 (TCAGTCAGTCAG):\n\t<ul>\n\t\t<li>Does not match any patterns (all fields = 0)</li>\n\t</ul>\n\t</li>\n\t<li>Sample 6 (ATATCGCGCTAG):\n\t<ul>\n\t\t<li>Does not start with ATG (has_start = 0)</li>\n\t\t<li>Ends with TAG (has_stop = 1)</li>\n\t\t<li>Starts with ATAT (has_atat = 1)</li>\n\t\t<li>Does not contain at least 3 consecutive 'G's (has_ggg = 0)</li>\n\t</ul>\n\t</li>\n\t<li>Sample 7 (CGTATGCGTCGTA):\n\t<ul>\n\t\t<li>Does not start with ATG (has_start = 0)</li>\n\t\t<li>Does not end with TAA, "TAG", or "TGA" (has_stop = 0)</li>\n\t\t<li>Does not contain ATAT (has_atat = 0)</li>\n\t\t<li>Does not contain at least 3 consecutive 'G's (has_ggg = 0)</li>\n\t</ul>\n\t</li>\n</ul>\n\n<p><strong>Note:</strong></p>\n\n<ul>\n\t<li>The result is ordered by sample_id in ascending order</li>\n\t<li>For each pattern, 1 indicates the pattern is present and 0 indicates it is not present</li>\n</ul>\n</div>\n",
|
||
"translatedTitle": "DNA 模式识别",
|
||
"translatedContent": "<p>表:<code>Samples</code></p>\n\n<pre>\n+----------------+---------+\n| Column Name | Type | \n+----------------+---------+\n| sample_id | int |\n| dna_sequence | varchar |\n| species | varchar |\n+----------------+---------+\nsample_id 是这张表的唯一主键。\n每一行包含一个 DNA 序列以一个字符(A,T,G,C)组成的字符串表示以及它所采集自的物种。\n</pre>\n\n<p>生物学家正在研究 DNA 序列中的基本模式。编写一个解决方案以识别具有以下模式的 <code>sample_id</code>:</p>\n\n<ul>\n\t<li>以 <strong>ATG</strong> <strong>开头</strong> 的序列(一个常见的 <strong>起始密码子</strong>)</li>\n\t<li>以 <strong>TAA</strong>,<strong>TAG</strong> 或 <strong>TGA</strong> <strong>结尾</strong> 的序列(终止密码子)</li>\n\t<li>包含基序 <strong>ATAT</strong> 的序列(一个简单重复模式)</li>\n\t<li>有 <strong>至少</strong> <code>3</code> <strong>个连续</strong> <strong>G</strong> 的序列(如 <strong>GGG</strong> 或 <strong>GGGG</strong>)</li>\n</ul>\n\n<p>返回结果表以 sample_id <strong>升序</strong> 排序<em>。</em></p>\n\n<p>结果格式如下所示。</p>\n\n<p> </p>\n\n<p><strong class=\"example\">示例:</strong></p>\n\n<div class=\"example-block\">\n<p><strong>输入:</strong></p>\n\n<p>Samples 表:</p>\n\n<pre class=\"example-io\">\n+-----------+------------------+-----------+\n| sample_id | dna_sequence | species |\n+-----------+------------------+-----------+\n| 1 | ATGCTAGCTAGCTAA | Human |\n| 2 | GGGTCAATCATC | Human |\n| 3 | ATATATCGTAGCTA | Human |\n| 4 | ATGGGGTCATCATAA | Mouse |\n| 5 | TCAGTCAGTCAG | Mouse |\n| 6 | ATATCGCGCTAG | Zebrafish |\n| 7 | CGTATGCGTCGTA | Zebrafish |\n+-----------+------------------+-----------+\n</pre>\n\n<p><strong>输出:</strong></p>\n\n<pre class=\"example-io\">\n+-----------+------------------+-------------+-------------+------------+------------+------------+\n| sample_id | dna_sequence | species | has_start | has_stop | has_atat | has_ggg |\n+-----------+------------------+-------------+-------------+------------+------------+------------+\n| 1 | ATGCTAGCTAGCTAA | Human | 1 | 1 | 0 | 0 |\n| 2 | GGGTCAATCATC | Human | 0 | 0 | 0 | 1 |\n| 3 | ATATATCGTAGCTA | Human | 0 | 0 | 1 | 0 |\n| 4 | ATGGGGTCATCATAA | Mouse | 1 | 1 | 0 | 1 |\n| 5 | TCAGTCAGTCAG | Mouse | 0 | 0 | 0 | 0 |\n| 6 | ATATCGCGCTAG | Zebrafish | 0 | 1 | 1 | 0 |\n| 7 | CGTATGCGTCGTA | Zebrafish | 0 | 0 | 0 | 0 |\n+-----------+------------------+-------------+-------------+------------+------------+------------+\n</pre>\n\n<p><strong>解释:</strong></p>\n\n<ul>\n\t<li>样本 1(ATGCTAGCTAGCTAA):\n\t<ul>\n\t\t<li>以 ATG 开头(has_start = 1)</li>\n\t\t<li>以 TAA 结尾(has_stop = 1)</li>\n\t\t<li>不包含 ATAT(has_atat = 0)</li>\n\t\t<li>不包含至少 3 个连续 ‘G’(has_ggg = 0)</li>\n\t</ul>\n\t</li>\n\t<li>样本 2(GGGTCAATCATC):\n\t<ul>\n\t\t<li>不以 ATG 开头(has_start = 0)</li>\n\t\t<li>不以 TAA,TAG 或 TGA 结尾(has_stop = 0)</li>\n\t\t<li>不包含 ATAT(has_atat = 0)</li>\n\t\t<li>包含 GGG(has_ggg = 1)</li>\n\t</ul>\n\t</li>\n\t<li>样本 3(ATATATCGTAGCTA):\n\t<ul>\n\t\t<li>不以 ATG 开头(has_start = 0)</li>\n\t\t<li>不以 TAA,TAG 或 TGA 结尾(has_stop = 0)</li>\n\t\t<li>包含 ATAT(has_atat = 1)</li>\n\t\t<li>不包含至少 3 个连续 ‘G’(has_ggg = 0)</li>\n\t</ul>\n\t</li>\n\t<li>样本 4(ATGGGGTCATCATAA):\n\t<ul>\n\t\t<li>以 ATG 开头(has_start = 1)</li>\n\t\t<li>以 TAA 结尾(has_stop = 1)</li>\n\t\t<li>不包含 ATAT(has_atat = 0)</li>\n\t\t<li>包含 GGGG(has_ggg = 1)</li>\n\t</ul>\n\t</li>\n\t<li>样本 5(TCAGTCAGTCAG):\n\t<ul>\n\t\t<li>不匹配任何模式(所有字段 = 0)</li>\n\t</ul>\n\t</li>\n\t<li>样本 6(ATATCGCGCTAG):\n\t<ul>\n\t\t<li>不以 ATG 开头(has_start = 0)</li>\n\t\t<li>以 TAG 结尾(has_stop = 1)</li>\n\t\t<li>包含 ATAT(has_atat = 1)</li>\n\t\t<li>不包含至少 3 个连续 ‘G’(has_ggg = 0)</li>\n\t</ul>\n\t</li>\n\t<li>样本 7(CGTATGCGTCGTA):\n\t<ul>\n\t\t<li>不以 ATG 开头(has_start = 0)</li>\n\t\t<li>不以 TAA,TAG 或 TGA 结尾(has_stop = 0)</li>\n\t\t<li>不包含 ATAT(has_atat = 0)</li>\n\t\t<li>不包含至少 3 个连续 ‘G’(has_ggg = 0)</li>\n\t</ul>\n\t</li>\n</ul>\n\n<p><strong>注意:</strong></p>\n\n<ul>\n\t<li>结果以 sample_id 升序排序</li>\n\t<li>对于每个模式,1 表示该模式存在,0 表示不存在</li>\n</ul>\n</div>\n",
|
||
"isPaidOnly": false,
|
||
"difficulty": "Medium",
|
||
"likes": 0,
|
||
"dislikes": 0,
|
||
"isLiked": null,
|
||
"similarQuestions": "[]",
|
||
"contributors": [],
|
||
"langToValidPlayground": "{\"cpp\": false, \"java\": false, \"python\": false, \"python3\": false, \"mysql\": false, \"mssql\": false, \"oraclesql\": false, \"c\": false, \"csharp\": false, \"javascript\": false, \"typescript\": false, \"bash\": false, \"php\": false, \"swift\": false, \"kotlin\": false, \"dart\": false, \"golang\": false, \"ruby\": false, \"scala\": false, \"html\": false, \"pythonml\": false, \"rust\": false, \"racket\": false, \"erlang\": false, \"elixir\": false, \"pythondata\": false, \"react\": false, \"vanillajs\": false, \"postgresql\": false, \"cangjie\": false}",
|
||
"topicTags": [],
|
||
"companyTagStats": null,
|
||
"codeSnippets": [
|
||
{
|
||
"lang": "MySQL",
|
||
"langSlug": "mysql",
|
||
"code": "# Write your MySQL query statement below",
|
||
"__typename": "CodeSnippetNode"
|
||
},
|
||
{
|
||
"lang": "MS SQL Server",
|
||
"langSlug": "mssql",
|
||
"code": "/* Write your T-SQL query statement below */",
|
||
"__typename": "CodeSnippetNode"
|
||
},
|
||
{
|
||
"lang": "Oracle",
|
||
"langSlug": "oraclesql",
|
||
"code": "/* Write your PL/SQL query statement below */",
|
||
"__typename": "CodeSnippetNode"
|
||
},
|
||
{
|
||
"lang": "Pandas",
|
||
"langSlug": "pythondata",
|
||
"code": "import pandas as pd\n\ndef analyze_dna_patterns(samples: pd.DataFrame) -> pd.DataFrame:\n ",
|
||
"__typename": "CodeSnippetNode"
|
||
},
|
||
{
|
||
"lang": "PostgreSQL",
|
||
"langSlug": "postgresql",
|
||
"code": "-- Write your PostgreSQL query statement below",
|
||
"__typename": "CodeSnippetNode"
|
||
}
|
||
],
|
||
"stats": "{\"totalAccepted\": \"258\", \"totalSubmission\": \"313\", \"totalAcceptedRaw\": 258, \"totalSubmissionRaw\": 313, \"acRate\": \"82.4%\"}",
|
||
"hints": [],
|
||
"solution": null,
|
||
"status": null,
|
||
"sampleTestCase": "{\"headers\":{\"Samples\":[\"sample_id\",\"dna_sequence\",\"species\"]},\"rows\":{\"Samples\":[[1,\"ATGCTAGCTAGCTAA\",\"Human\"],[2,\"GGGTCAATCATC\",\"Human\"],[3,\"ATATATCGTAGCTA\",\"Human\"],[4,\"ATGGGGTCATCATAA\",\"Mouse\"],[5,\"TCAGTCAGTCAG\",\"Mouse\"],[6,\"ATATCGCGCTAG\",\"Zebrafish\"],[7,\"CGTATGCGTCGTA\",\"Zebrafish\"]]}}",
|
||
"metaData": "{\"mysql\":[\"CREATE TABLE if not exists Samples (\\n sample_id INT,\\n dna_sequence VARCHAR(255),\\n species VARCHAR(100)\\n)\"],\"mssql\":[\"CREATE TABLE Samples (\\n sample_id INT,\\n dna_sequence VARCHAR(255),\\n species VARCHAR(100)\\n)\"],\"oraclesql\":[\"CREATE TABLE Samples (\\n sample_id NUMBER,\\n dna_sequence VARCHAR2(255),\\n species VARCHAR2(100)\\n)\"],\"database\":true,\"name\":\"analyze_dna_patterns\",\"postgresql\":[\"CREATE TABLE IF NOT EXISTS Samples (\\n sample_id INT,\\n dna_sequence TEXT NOT NULL,\\n species VARCHAR(100) NOT NULL\\n);\\n\"],\"pythondata\":[\"Samples = pd.DataFrame({\\n 'sample_id': pd.Series(dtype='int'), # Equivalent to SERIAL\\/INTEGER\\n 'dna_sequence': pd.Series(dtype='string'), # Equivalent to TEXT\\/VARCHAR\\n 'species': pd.Series(dtype='string') # Equivalent to VARCHAR(100)\\n})\"],\"database_schema\":{\"Samples\":{\"sample_id\":\"INT\",\"dna_sequence\":\"VARCHAR(255)\",\"species\":\"VARCHAR(100)\"}}}",
|
||
"judgerAvailable": true,
|
||
"judgeType": "large",
|
||
"mysqlSchemas": [
|
||
"CREATE TABLE if not exists Samples (\n sample_id INT,\n dna_sequence VARCHAR(255),\n species VARCHAR(100)\n)",
|
||
"Truncate table Samples",
|
||
"insert into Samples (sample_id, dna_sequence, species) values ('1', 'ATGCTAGCTAGCTAA', 'Human')",
|
||
"insert into Samples (sample_id, dna_sequence, species) values ('2', 'GGGTCAATCATC', 'Human')",
|
||
"insert into Samples (sample_id, dna_sequence, species) values ('3', 'ATATATCGTAGCTA', 'Human')",
|
||
"insert into Samples (sample_id, dna_sequence, species) values ('4', 'ATGGGGTCATCATAA', 'Mouse')",
|
||
"insert into Samples (sample_id, dna_sequence, species) values ('5', 'TCAGTCAGTCAG', 'Mouse')",
|
||
"insert into Samples (sample_id, dna_sequence, species) values ('6', 'ATATCGCGCTAG', 'Zebrafish')",
|
||
"insert into Samples (sample_id, dna_sequence, species) values ('7', 'CGTATGCGTCGTA', 'Zebrafish')"
|
||
],
|
||
"enableRunCode": true,
|
||
"envInfo": "{\"mysql\":[\"MySQL\",\"<p>\\u7248\\u672c\\uff1a<code>MySQL 8.0<\\/code><\\/p>\"],\"mssql\":[\"MS SQL Server\",\"<p>mssql server 2019.<\\/p>\"],\"oraclesql\":[\"Oracle\",\"<p>Oracle Sql 11.2.<\\/p>\"],\"pythondata\":[\"Pandas\",\"<p>Python 3.10 with Pandas 2.2.2 and NumPy 1.26.4<\\/p>\"],\"postgresql\":[\"PostgreSQL\",\"<p>PostgreSQL 16<\\/p>\"]}",
|
||
"book": null,
|
||
"isSubscribed": false,
|
||
"isDailyQuestion": false,
|
||
"dailyRecordStatus": null,
|
||
"editorType": "CKEDITOR",
|
||
"ugcQuestionId": null,
|
||
"style": "LEETCODE",
|
||
"exampleTestcases": "{\"headers\":{\"Samples\":[\"sample_id\",\"dna_sequence\",\"species\"]},\"rows\":{\"Samples\":[[1,\"ATGCTAGCTAGCTAA\",\"Human\"],[2,\"GGGTCAATCATC\",\"Human\"],[3,\"ATATATCGTAGCTA\",\"Human\"],[4,\"ATGGGGTCATCATAA\",\"Mouse\"],[5,\"TCAGTCAGTCAG\",\"Mouse\"],[6,\"ATATCGCGCTAG\",\"Zebrafish\"],[7,\"CGTATGCGTCGTA\",\"Zebrafish\"]]}}",
|
||
"__typename": "QuestionNode"
|
||
}
|
||
}
|
||
} |