{ "data": { "question": { "questionId": "3816", "questionFrontendId": "3475", "categoryTitle": "Database", "boundTopicId": 3593036, "title": "DNA Pattern Recognition ", "titleSlug": "dna-pattern-recognition", "content": "

Table: Samples

\n\n
\n+----------------+---------+\n| Column Name    | Type    | \n+----------------+---------+\n| sample_id      | int     |\n| dna_sequence   | varchar |\n| species        | varchar |\n+----------------+---------+\nsample_id is the unique key for this table.\nEach row contains a DNA sequence represented as a string of characters (A, T, G, C) and the species it was collected from.\n
\n\n

Biologists are studying basic patterns in DNA sequences. Write a solution to identify sample_id with the following patterns:

\n\n\n\n

Return the result table ordered by sample_id in ascending order.

\n\n

The result format is in the following example.

\n\n

 

\n

Example:

\n\n
\n

Input:

\n\n

Samples table:

\n\n
\n+-----------+------------------+-----------+\n| sample_id | dna_sequence     | species   |\n+-----------+------------------+-----------+\n| 1         | ATGCTAGCTAGCTAA  | Human     |\n| 2         | GGGTCAATCATC     | Human     |\n| 3         | ATATATCGTAGCTA   | Human     |\n| 4         | ATGGGGTCATCATAA  | Mouse     |\n| 5         | TCAGTCAGTCAG     | Mouse     |\n| 6         | ATATCGCGCTAG     | Zebrafish |\n| 7         | CGTATGCGTCGTA    | Zebrafish |\n+-----------+------------------+-----------+\n
\n\n

Output:

\n\n
\n+-----------+------------------+-------------+-------------+------------+------------+------------+\n| sample_id | dna_sequence     | species     | has_start   | has_stop   | has_atat   | has_ggg    |\n+-----------+------------------+-------------+-------------+------------+------------+------------+\n| 1         | ATGCTAGCTAGCTAA  | Human       | 1           | 1          | 0          | 0          |\n| 2         | GGGTCAATCATC     | Human       | 0           | 0          | 0          | 1          |\n| 3         | ATATATCGTAGCTA   | Human       | 0           | 0          | 1          | 0          |\n| 4         | ATGGGGTCATCATAA  | Mouse       | 1           | 1          | 0          | 1          |\n| 5         | TCAGTCAGTCAG     | Mouse       | 0           | 0          | 0          | 0          |\n| 6         | ATATCGCGCTAG     | Zebrafish   | 0           | 1          | 1          | 0          |\n| 7         | CGTATGCGTCGTA    | Zebrafish   | 0           | 0          | 0          | 0          |\n+-----------+------------------+-------------+-------------+------------+------------+------------+\n
\n\n

Explanation:

\n\n\n\n

Note:

\n\n\n
\n", "translatedTitle": "DNA 模式识别", "translatedContent": "

表:Samples

\n\n
\n+----------------+---------+\n| Column Name    | Type    | \n+----------------+---------+\n| sample_id      | int     |\n| dna_sequence   | varchar |\n| species        | varchar |\n+----------------+---------+\nsample_id 是这张表的唯一主键。\n每一行包含一个 DNA 序列以一个字符(A,T,G,C)组成的字符串表示以及它所采集自的物种。\n
\n\n

生物学家正在研究 DNA 序列中的基本模式。编写一个解决方案以识别具有以下模式的 sample_id

\n\n\n\n

返回结果表以 sample_id 升序 排序

\n\n

结果格式如下所示。

\n\n

 

\n\n

示例:

\n\n
\n

输入:

\n\n

Samples 表:

\n\n
\n+-----------+------------------+-----------+\n| sample_id | dna_sequence     | species   |\n+-----------+------------------+-----------+\n| 1         | ATGCTAGCTAGCTAA  | Human     |\n| 2         | GGGTCAATCATC     | Human     |\n| 3         | ATATATCGTAGCTA   | Human     |\n| 4         | ATGGGGTCATCATAA  | Mouse     |\n| 5         | TCAGTCAGTCAG     | Mouse     |\n| 6         | ATATCGCGCTAG     | Zebrafish |\n| 7         | CGTATGCGTCGTA    | Zebrafish |\n+-----------+------------------+-----------+\n
\n\n

输出:

\n\n
\n+-----------+------------------+-------------+-------------+------------+------------+------------+\n| sample_id | dna_sequence     | species     | has_start   | has_stop   | has_atat   | has_ggg    |\n+-----------+------------------+-------------+-------------+------------+------------+------------+\n| 1         | ATGCTAGCTAGCTAA  | Human       | 1           | 1          | 0          | 0          |\n| 2         | GGGTCAATCATC     | Human       | 0           | 0          | 0          | 1          |\n| 3         | ATATATCGTAGCTA   | Human       | 0           | 0          | 1          | 0          |\n| 4         | ATGGGGTCATCATAA  | Mouse       | 1           | 1          | 0          | 1          |\n| 5         | TCAGTCAGTCAG     | Mouse       | 0           | 0          | 0          | 0          |\n| 6         | ATATCGCGCTAG     | Zebrafish   | 0           | 1          | 1          | 0          |\n| 7         | CGTATGCGTCGTA    | Zebrafish   | 0           | 0          | 0          | 0          |\n+-----------+------------------+-------------+-------------+------------+------------+------------+\n
\n\n

解释:

\n\n\n\n

注意:

\n\n\n
\n", "isPaidOnly": false, "difficulty": "Medium", "likes": 0, "dislikes": 0, "isLiked": null, "similarQuestions": "[]", "contributors": [], "langToValidPlayground": "{\"cpp\": false, \"java\": false, \"python\": false, \"python3\": false, \"mysql\": false, \"mssql\": false, \"oraclesql\": false, \"c\": false, \"csharp\": false, \"javascript\": false, \"typescript\": false, \"bash\": false, \"php\": false, \"swift\": false, \"kotlin\": false, \"dart\": false, \"golang\": false, \"ruby\": false, \"scala\": false, \"html\": false, \"pythonml\": false, \"rust\": false, \"racket\": false, \"erlang\": false, \"elixir\": false, \"pythondata\": false, \"react\": false, \"vanillajs\": false, \"postgresql\": false, \"cangjie\": false}", "topicTags": [], "companyTagStats": null, "codeSnippets": [ { "lang": "MySQL", "langSlug": "mysql", "code": "# Write your MySQL query statement below", "__typename": "CodeSnippetNode" }, { "lang": "MS SQL Server", "langSlug": "mssql", "code": "/* Write your T-SQL query statement below */", "__typename": "CodeSnippetNode" }, { "lang": "Oracle", "langSlug": "oraclesql", "code": "/* Write your PL/SQL query statement below */", "__typename": "CodeSnippetNode" }, { "lang": "Pandas", "langSlug": "pythondata", "code": "import pandas as pd\n\ndef analyze_dna_patterns(samples: pd.DataFrame) -> pd.DataFrame:\n ", "__typename": "CodeSnippetNode" }, { "lang": "PostgreSQL", "langSlug": "postgresql", "code": "-- Write your PostgreSQL query statement below", "__typename": "CodeSnippetNode" } ], "stats": "{\"totalAccepted\": \"258\", \"totalSubmission\": \"313\", \"totalAcceptedRaw\": 258, \"totalSubmissionRaw\": 313, \"acRate\": \"82.4%\"}", "hints": [], "solution": null, "status": null, "sampleTestCase": "{\"headers\":{\"Samples\":[\"sample_id\",\"dna_sequence\",\"species\"]},\"rows\":{\"Samples\":[[1,\"ATGCTAGCTAGCTAA\",\"Human\"],[2,\"GGGTCAATCATC\",\"Human\"],[3,\"ATATATCGTAGCTA\",\"Human\"],[4,\"ATGGGGTCATCATAA\",\"Mouse\"],[5,\"TCAGTCAGTCAG\",\"Mouse\"],[6,\"ATATCGCGCTAG\",\"Zebrafish\"],[7,\"CGTATGCGTCGTA\",\"Zebrafish\"]]}}", "metaData": "{\"mysql\":[\"CREATE TABLE if not exists Samples (\\n sample_id INT,\\n dna_sequence VARCHAR(255),\\n species VARCHAR(100)\\n)\"],\"mssql\":[\"CREATE TABLE Samples (\\n sample_id INT,\\n dna_sequence VARCHAR(255),\\n species VARCHAR(100)\\n)\"],\"oraclesql\":[\"CREATE TABLE Samples (\\n sample_id NUMBER,\\n dna_sequence VARCHAR2(255),\\n species VARCHAR2(100)\\n)\"],\"database\":true,\"name\":\"analyze_dna_patterns\",\"postgresql\":[\"CREATE TABLE IF NOT EXISTS Samples (\\n sample_id INT,\\n dna_sequence TEXT NOT NULL,\\n species VARCHAR(100) NOT NULL\\n);\\n\"],\"pythondata\":[\"Samples = pd.DataFrame({\\n 'sample_id': pd.Series(dtype='int'), # Equivalent to SERIAL\\/INTEGER\\n 'dna_sequence': pd.Series(dtype='string'), # Equivalent to TEXT\\/VARCHAR\\n 'species': pd.Series(dtype='string') # Equivalent to VARCHAR(100)\\n})\"],\"database_schema\":{\"Samples\":{\"sample_id\":\"INT\",\"dna_sequence\":\"VARCHAR(255)\",\"species\":\"VARCHAR(100)\"}}}", "judgerAvailable": true, "judgeType": "large", "mysqlSchemas": [ "CREATE TABLE if not exists Samples (\n sample_id INT,\n dna_sequence VARCHAR(255),\n species VARCHAR(100)\n)", "Truncate table Samples", "insert into Samples (sample_id, dna_sequence, species) values ('1', 'ATGCTAGCTAGCTAA', 'Human')", "insert into Samples (sample_id, dna_sequence, species) values ('2', 'GGGTCAATCATC', 'Human')", "insert into Samples (sample_id, dna_sequence, species) values ('3', 'ATATATCGTAGCTA', 'Human')", "insert into Samples (sample_id, dna_sequence, species) values ('4', 'ATGGGGTCATCATAA', 'Mouse')", "insert into Samples (sample_id, dna_sequence, species) values ('5', 'TCAGTCAGTCAG', 'Mouse')", "insert into Samples (sample_id, dna_sequence, species) values ('6', 'ATATCGCGCTAG', 'Zebrafish')", "insert into Samples (sample_id, dna_sequence, species) values ('7', 'CGTATGCGTCGTA', 'Zebrafish')" ], "enableRunCode": true, "envInfo": "{\"mysql\":[\"MySQL\",\"

\\u7248\\u672c\\uff1aMySQL 8.0<\\/code><\\/p>\"],\"mssql\":[\"MS SQL Server\",\"

mssql server 2019.<\\/p>\"],\"oraclesql\":[\"Oracle\",\"

Oracle Sql 11.2.<\\/p>\"],\"pythondata\":[\"Pandas\",\"

Python 3.10 with Pandas 2.2.2 and NumPy 1.26.4<\\/p>\"],\"postgresql\":[\"PostgreSQL\",\"

PostgreSQL 16<\\/p>\"]}", "book": null, "isSubscribed": false, "isDailyQuestion": false, "dailyRecordStatus": null, "editorType": "CKEDITOR", "ugcQuestionId": null, "style": "LEETCODE", "exampleTestcases": "{\"headers\":{\"Samples\":[\"sample_id\",\"dna_sequence\",\"species\"]},\"rows\":{\"Samples\":[[1,\"ATGCTAGCTAGCTAA\",\"Human\"],[2,\"GGGTCAATCATC\",\"Human\"],[3,\"ATATATCGTAGCTA\",\"Human\"],[4,\"ATGGGGTCATCATAA\",\"Mouse\"],[5,\"TCAGTCAGTCAG\",\"Mouse\"],[6,\"ATATCGCGCTAG\",\"Zebrafish\"],[7,\"CGTATGCGTCGTA\",\"Zebrafish\"]]}}", "__typename": "QuestionNode" } } }